[FFmpeg-cvslog] avcodec/ac3dsp: add missing stddef.h include

2023-12-01 Thread James Almer
ffmpeg | branch: master | James Almer  | Fri Dec  1 12:42:22 
2023 -0300| [6d196112516f5298f263eeb29a8a1626b6e090d4] | committer: James Almer

avcodec/ac3dsp: add missing stddef.h include

Should fix make checkheaders

Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6d196112516f5298f263eeb29a8a1626b6e090d4
---

 libavcodec/ac3dsp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index ec2f598451..ae33b361a9 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -22,6 +22,7 @@
 #ifndef AVCODEC_AC3DSP_H
 #define AVCODEC_AC3DSP_H
 
+#include 
 #include 
 
 /**

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] checkasm/ac3dsp: add float_to_fixed24 test

2023-12-01 Thread sunyuechi
ffmpeg | branch: master | sunyuechi  | Wed Nov 22 
14:57:29 2023 +0800| [d0ec826077c49f4cbf286621771a4a43a9bf57b8] | committer: 
Rémi Denis-Courmont

checkasm/ac3dsp: add float_to_fixed24 test

Signed-off-by: Rémi Denis-Courmont 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d0ec826077c49f4cbf286621771a4a43a9bf57b8
---

 tests/checkasm/Makefile   |  1 +
 tests/checkasm/ac3dsp.c   | 70 +++
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/fate/checkasm.mak   |  1 +
 5 files changed, 76 insertions(+)

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 8bc241d29b..53742c93ae 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -1,5 +1,6 @@
 # libavcodec tests
 # subsystems
+AVCODECOBJS-$(CONFIG_AC3DSP)+= ac3dsp.o
 AVCODECOBJS-$(CONFIG_AUDIODSP)  += audiodsp.o
 AVCODECOBJS-$(CONFIG_BLOCKDSP)  += blockdsp.o
 AVCODECOBJS-$(CONFIG_BSWAPDSP)  += bswapdsp.o
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
new file mode 100644
index 00..8f36f1736c
--- /dev/null
+++ b/tests/checkasm/ac3dsp.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+
+#include "libavutil/mem.h"
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/ac3dsp.h"
+
+#include "checkasm.h"
+
+#define randomize_float(buf, len)   \
+do {\
+int i;  \
+for (i = 0; i < len; i++) { \
+float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+buf[i] = f; \
+}   \
+} while (0)
+
+static void check_float_to_fixed24(AC3DSPContext *c) {
+#define BUF_SIZE 1024
+LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
+
+declare_func(void, int32_t *, const float *, unsigned int);
+
+randomize_float(src, BUF_SIZE);
+
+if (check_func(c->float_to_fixed24, "float_to_fixed24")) {
+LOCAL_ALIGNED_32(int32_t, dst, [BUF_SIZE]);
+LOCAL_ALIGNED_32(int32_t, dst2, [BUF_SIZE]);
+
+call_ref(dst, src, BUF_SIZE);
+call_new(dst2, src, BUF_SIZE);
+
+if (memcmp(dst, dst2, BUF_SIZE) != 0)
+fail();
+
+bench_new(dst, src, BUF_SIZE);
+}
+
+
+report("float_to_fixed24");
+}
+
+void checkasm_check_ac3dsp(void)
+{
+AC3DSPContext c;
+ff_ac3dsp_init(&c);
+
+check_float_to_fixed24(&c);
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index a15e801caf..0a1285eca4 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -77,6 +77,9 @@ static const struct {
 { "aacpsdsp", checkasm_check_aacpsdsp },
 { "sbrdsp",   checkasm_check_sbrdsp },
 #endif
+#if CONFIG_AC3DSP
+{ "ac3dsp", checkasm_check_ac3dsp },
+#endif
 #if CONFIG_ALAC_DECODER
 { "alacdsp", checkasm_check_alacdsp },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 41093f2dca..11d2f7286f 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -44,6 +44,7 @@
 #include "libavutil/timer.h"
 
 void checkasm_check_aacpsdsp(void);
+void checkasm_check_ac3dsp(void);
 void checkasm_check_afir(void);
 void checkasm_check_alacdsp(void);
 void checkasm_check_audiodsp(void);
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 57b0dff4f2..b8ffa0a77e 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -1,4 +1,5 @@
 FATE_CHECKASM = fate-checkasm-aacpsdsp  \
+fate-checkasm-ac3dsp\
 fate-checkasm-af_afir   \
 fate-checkasm-alacdsp   \
 fate-checkasm-audiodsp  \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinf

[FFmpeg-cvslog] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels

2023-12-01 Thread Logan Lyu
ffmpeg | branch: master | Logan Lyu  | Sun Nov  5 
16:33:17 2023 +0800| [40cf4a5ca3ce1bdb2623ac0f8a956c27203540ea] | committer: 
Martin Storsjö

lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels

put_hevc_pel_bi_pixels4_8_c: 54.7
put_hevc_pel_bi_pixels4_8_neon: 43.0
put_hevc_pel_bi_pixels6_8_c: 94.7
put_hevc_pel_bi_pixels6_8_neon: 37.0
put_hevc_pel_bi_pixels8_8_c: 171.0
put_hevc_pel_bi_pixels8_8_neon: 24.0
put_hevc_pel_bi_pixels12_8_c: 354.0
put_hevc_pel_bi_pixels12_8_neon: 68.7
put_hevc_pel_bi_pixels16_8_c: 588.2
put_hevc_pel_bi_pixels16_8_neon: 77.5
put_hevc_pel_bi_pixels24_8_c: 1670.7
put_hevc_pel_bi_pixels24_8_neon: 173.0
put_hevc_pel_bi_pixels32_8_c: 2267.7
put_hevc_pel_bi_pixels32_8_neon: 281.2
put_hevc_pel_bi_pixels48_8_c: 5787.5
put_hevc_pel_bi_pixels48_8_neon: 673.5
put_hevc_pel_bi_pixels64_8_c: 9897.0
put_hevc_pel_bi_pixels64_8_neon: 1159.5

Co-Authored-By: J. Dekker 
Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=40cf4a5ca3ce1bdb2623ac0f8a956c27203540ea
---

 libavcodec/aarch64/hevcdsp_epel_neon.S| 179 ++
 libavcodec/aarch64/hevcdsp_init_aarch64.c |   6 +
 2 files changed, 185 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
b/libavcodec/aarch64/hevcdsp_epel_neon.S
index c077c204cc..b441f26bed 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -244,6 +244,185 @@ function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
 endfunc
 
 
+function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v0.s}[0], [x2], x3 // src
+ushll   v16.8h, v0.8b, #6
+ld1 {v20.4h}, [x4], x10 // src2
+sqadd   v16.8h, v16.8h, v20.8h
+sqrshrunv0.8b,  v16.8h, #7
+st1 {v0.s}[0], [x0], x1
+subsw5, w5, #1
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
+mov x10, #(MAX_PB_SIZE * 2)
+sub x1, x1, #4
+1:  ld1 {v0.8b}, [x2], x3
+ushll   v16.8h, v0.8b, #6
+ld1 {v20.8h}, [x4], x10
+sqadd   v16.8h, v16.8h, v20.8h
+sqrshrunv0.8b,  v16.8h, #7
+st1 {v0.s}[0], [x0], #4
+st1 {v0.h}[2], [x0], x1
+subsw5, w5, #1
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v0.8b}, [x2], x3// src
+ushll   v16.8h, v0.8b, #6
+ld1 {v20.8h}, [x4], x10  // src2
+sqadd   v16.8h, v16.8h, v20.8h
+sqrshrunv0.8b,  v16.8h, #7
+subsw5, w5, #1
+st1 {v0.8b}, [x0], x1
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
+mov x10, #(MAX_PB_SIZE * 2)
+sub x1, x1, #8
+1:  ld1 {v0.16b}, [x2], x3
+ushll   v16.8h, v0.8b, #6
+ushll2  v17.8h, v0.16b, #6
+ld1 {v20.8h, v21.8h}, [x4], x10
+sqadd   v16.8h, v16.8h, v20.8h
+sqadd   v17.8h, v17.8h, v21.8h
+sqrshrunv0.8b,  v16.8h, #7
+sqrshrun2   v0.16b, v17.8h, #7
+st1 {v0.8b}, [x0], #8
+subsw5, w5, #1
+st1 {v0.s}[2], [x0], x1
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v0.16b}, [x2], x3 // src
+ushll   v16.8h, v0.8b, #6
+ushll2  v17.8h, v0.16b, #6
+ld1 {v20.8h, v21.8h}, [x4], x10  // src2
+sqadd   v16.8h, v16.8h, v20.8h
+sqadd   v17.8h, v17.8h, v21.8h
+sqrshrunv0.8b,  v16.8h, #7
+sqrshrun2   v0.16b, v17.8h, #7
+subsw5, w5, #1
+st1 {v0.16b}, [x0], x1
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v0.8b-v2.8b}, [x2], x3 // src
+ushll   v16.8h, v0.8b, #6
+ushll   v17.8h, v1.8b, #6
+ushll   v18.8h, v2.8b, #6
+ld1 {v20.8h-v22.8h}, [x4], x10  // src2
+sqadd   v16.8h, v16.8h, v20.8h
+sqadd   v17.8h, v17.8h, v21.8h
+sqadd   v18.8h, v18.8h, v22.8h
+sqrshrunv0.8b, v16.8h, #7
+sqrshrunv1.8b, v17.8h, #7
+sqrshrunv2.8b, v18.8h, #7
+subsw5, w5,

[FFmpeg-cvslog] lavc/aarch64: new optimization for 8-bit hevc_epel_bi_h

2023-12-01 Thread Logan Lyu
ffmpeg | branch: master | Logan Lyu  | Sat Nov 11 
17:54:35 2023 +0800| [216275bd8098fc4a08fd4c38191c8c217a6b897a] | committer: 
Martin Storsjö

lavc/aarch64: new optimization for 8-bit hevc_epel_bi_h

put_hevc_epel_bi_h4_8_c: 96.0
put_hevc_epel_bi_h4_8_neon: 36.3
put_hevc_epel_bi_h6_8_c: 288.3
put_hevc_epel_bi_h6_8_neon: 59.3
put_hevc_epel_bi_h8_8_c: 358.5
put_hevc_epel_bi_h8_8_neon: 61.5
put_hevc_epel_bi_h12_8_c: 759.8
put_hevc_epel_bi_h12_8_neon: 159.5
put_hevc_epel_bi_h16_8_c: 1307.0
put_hevc_epel_bi_h16_8_neon: 182.0
put_hevc_epel_bi_h24_8_c: 2778.3
put_hevc_epel_bi_h24_8_neon: 430.5
put_hevc_epel_bi_h32_8_c: 4952.3
put_hevc_epel_bi_h32_8_neon: 679.5
put_hevc_epel_bi_h48_8_c: 11803.3
put_hevc_epel_bi_h48_8_neon: 1443.5
put_hevc_epel_bi_h64_8_c: 20654.8
put_hevc_epel_bi_h64_8_neon: 2737.0
put_hevc_qpel_bi_h4_8_c: 140.0
put_hevc_qpel_bi_h4_8_neon: 111.5
put_hevc_qpel_bi_h6_8_c: 318.0
put_hevc_qpel_bi_h6_8_neon: 85.8
put_hevc_qpel_bi_h8_8_c: 536.5
put_hevc_qpel_bi_h8_8_neon: 95.3
put_hevc_qpel_bi_h12_8_c: 1188.5
put_hevc_qpel_bi_h12_8_neon: 291.3
put_hevc_qpel_bi_h16_8_c: 2064.3
put_hevc_qpel_bi_h16_8_neon: 365.3
put_hevc_qpel_bi_h24_8_c: 4757.5
put_hevc_qpel_bi_h24_8_neon: 1010.0
put_hevc_qpel_bi_h32_8_c: 8351.8
put_hevc_qpel_bi_h32_8_neon: 2917.8
put_hevc_qpel_bi_h48_8_c: 19299.8
put_hevc_qpel_bi_h48_8_neon: 2976.8
put_hevc_qpel_bi_h64_8_c: 34182.5
put_hevc_qpel_bi_h64_8_neon: 5236.3

Co-Authored-By: J. Dekker 
Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=216275bd8098fc4a08fd4c38191c8c217a6b897a
---

 libavcodec/aarch64/hevcdsp_epel_neon.S| 257 ++
 libavcodec/aarch64/hevcdsp_init_aarch64.c |   5 +
 2 files changed, 262 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
b/libavcodec/aarch64/hevcdsp_epel_neon.S
index b441f26bed..b84d7db1fb 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -423,6 +423,263 @@ function ff_hevc_put_hevc_pel_bi_pixels64_8_neon, export=1
 ret
 endfunc
 
+
+function ff_hevc_put_hevc_epel_bi_h4_8_neon, export=1
+load_epel_filterb x6, x7
+sub x2, x2, #1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v4.8b}, [x2], x3
+ext v5.8b, v4.8b, v4.8b, #1
+ext v6.8b, v4.8b, v4.8b, #2
+ext v7.8b, v4.8b, v4.8b, #3
+calc_epelb  v16, v4, v5, v6, v7
+ld1 {v20.4h}, [x4], x10
+sqadd   v16.8h, v16.8h, v20.8h
+sqrshrunv4.8b, v16.8h, #7
+st1 {v4.s}[0], [x0], x1
+subsw5, w5, #1   // height
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_h6_8_neon, export=1
+load_epel_filterb x6, x7
+sub w1, w1, #4
+sub x2, x2, #1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v24.16b}, [x2], x3
+ext v26.16b, v24.16b, v24.16b, #1
+ext v27.16b, v24.16b, v24.16b, #2
+ext v28.16b, v24.16b, v24.16b, #3
+calc_epelb  v16, v24, v26, v27, v28
+ld1 {v20.8h}, [x4], x10
+sqadd   v16.8h, v16.8h, v20.8h
+sqrshrunv16.8b, v16.8h, #7
+st1 {v16.s}[0], [x0], #4
+st1 {v16.h}[2], [x0], x1
+subsw5, w5, #1   // height
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_h8_8_neon, export=1
+load_epel_filterb x6, x7
+sub x2, x2, #1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v24.16b}, [x2], x3
+ext v26.16b, v24.16b, v24.16b, #1
+ext v27.16b, v24.16b, v24.16b, #2
+ext v28.16b, v24.16b, v24.16b, #3
+calc_epelb  v16, v24, v26, v27, v28
+ld1 {v20.8h}, [x4], x10
+sqadd   v16.8h, v16.8h, v20.8h
+sqrshrunv16.8b, v16.8h, #7
+st1 {v16.8b}, [x0], x1
+subsw5, w5, #1   // height
+b.ne1b
+ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_h12_8_neon, export=1
+load_epel_filterb x6, x7
+sub x1, x1, #8
+sub x2, x2, #1
+mov x10, #(MAX_PB_SIZE * 2)
+1:  ld1 {v24.16b}, [x2], x3
+ext v26.16b, v24.16b, v24.16b, #1
+ext v27.16b, v24.16b, v24.16b, #2
+ext v28.16b, v24.16b, v24.16b, #3
+calc_epelb  v16, v24, v26, v27, v28
+calc_epelb2 v17, v24, v26, v27, v28
+ld1 {v20.8h, v21.8h}, [x4], x10
+sqadd   v18.8h, v16.8h, v20.8h
+sqadd   v19.8h, v17.8h, v21.8h
+sqrshrunv20.8b, v18.8h, #7
+sqrs

[FFmpeg-cvslog] lavc/aarch64: new optimization for 8-bit hevc_epel_bi_v

2023-12-01 Thread Logan Lyu
ffmpeg | branch: master | Logan Lyu  | Sat Nov 11 
19:17:36 2023 +0800| [0448f27f41457a058256f0f5145c91e88064e051] | committer: 
Martin Storsjö

lavc/aarch64: new optimization for 8-bit hevc_epel_bi_v

put_hevc_epel_bi_v4_8_c: 138.4
put_hevc_epel_bi_v4_8_neon: 33.7
put_hevc_epel_bi_v6_8_c: 302.9
put_hevc_epel_bi_v6_8_neon: 46.7
put_hevc_epel_bi_v8_8_c: 408.7
put_hevc_epel_bi_v8_8_neon: 48.7
put_hevc_epel_bi_v12_8_c: 779.4
put_hevc_epel_bi_v12_8_neon: 139.7
put_hevc_epel_bi_v16_8_c: 1344.9
put_hevc_epel_bi_v16_8_neon: 160.2
put_hevc_epel_bi_v24_8_c: 2981.7
put_hevc_epel_bi_v24_8_neon: 344.9
put_hevc_epel_bi_v32_8_c: 5280.9
put_hevc_epel_bi_v32_8_neon: 618.4
put_hevc_epel_bi_v48_8_c: 12494.9
put_hevc_epel_bi_v48_8_neon: 1364.4
put_hevc_epel_bi_v64_8_c: 22127.7
put_hevc_epel_bi_v64_8_neon: 2473.7

Co-Authored-By: J. Dekker 
Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0448f27f41457a058256f0f5145c91e88064e051
---

 libavcodec/aarch64/hevcdsp_epel_neon.S| 212 ++
 libavcodec/aarch64/hevcdsp_init_aarch64.c |   5 +
 2 files changed, 217 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
b/libavcodec/aarch64/hevcdsp_epel_neon.S
index b84d7db1fb..2f9e7e46c4 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -680,6 +680,218 @@ function ff_hevc_put_hevc_epel_bi_h64_8_neon, export=1
 ret
 endfunc
 
+function ff_hevc_put_hevc_epel_bi_v4_8_neon, export=1
+load_epel_filterb x7, x6
+sub x2, x2, x3
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.s}[0], [x2], x3
+ld1 {v17.s}[0], [x2], x3
+ld1 {v18.s}[0], [x2], x3
+.macro calc src0, src1, src2, src3
+ld1 {\src3\().s}[0], [x2], x3
+calc_epelb  v4, \src0, \src1, \src2, \src3
+ld1 {v24.4h}, [x4], x10
+sqadd   v4.8h, v4.8h, v24.8h
+sqrshrunv4.8b, v4.8h, #7
+subsw5, w5, #1
+st1 {v4.s}[0], [x0], x1
+.endm
+1:  calc_all4
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_v6_8_neon, export=1
+load_epel_filterb x7, x6
+sub x2, x2, x3
+sub x1, x1, #4
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.8b}, [x2], x3
+ld1 {v17.8b}, [x2], x3
+ld1 {v18.8b}, [x2], x3
+.macro calc src0, src1, src2, src3
+ld1 {\src3\().8b}, [x2], x3
+calc_epelb  v4, \src0, \src1, \src2, \src3
+ld1 {v24.8h}, [x4], x10
+sqadd   v4.8h, v4.8h, v24.8h
+sqrshrunv4.8b, v4.8h, #7
+st1 {v4.s}[0], [x0], #4
+subsw5, w5, #1
+st1 {v4.h}[2], [x0], x1
+.endm
+1:  calc_all4
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_v8_8_neon, export=1
+load_epel_filterb x7, x6
+sub x2, x2, x3
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.8b}, [x2], x3
+ld1 {v17.8b}, [x2], x3
+ld1 {v18.8b}, [x2], x3
+.macro calc src0, src1, src2, src3
+ld1 {\src3\().8b}, [x2], x3
+calc_epelb  v4, \src0, \src1, \src2, \src3
+ld1 {v24.8h}, [x4], x10
+sqadd   v4.8h, v4.8h, v24.8h
+sqrshrunv4.8b, v4.8h, #7
+subsw5, w5, #1
+st1 {v4.8b}, [x0], x1
+.endm
+1:  calc_all4
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_v12_8_neon, export=1
+load_epel_filterb x7, x6
+sub x1, x1, #8
+sub x2, x2, x3
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.16b}, [x2], x3
+ld1 {v17.16b}, [x2], x3
+ld1 {v18.16b}, [x2], x3
+.macro calc src0, src1, src2, src3
+ld1 {\src3\().16b}, [x2], x3
+calc_epelb  v4, \src0, \src1, \src2, \src3
+calc_epelb2 v5, \src0, \src1, \src2, \src3
+ld1 {v24.8h, v25.8h}, [x4], x10
+sqadd   v4.8h, v4.8h, v24.8h
+sqadd   v5.8h, v5.8h, v25.8h
+sqrshrunv4.8b, v4.8h, #7
+sqrshrun2   v4.16b, v5.8h, #7
+st1 {v4.8b}, [x0], #8
+subsw5, w5, #1
+st1 {v4.s}[2], [x0], x1
+.endm
+1:  calc_all4
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_v16_8_neon, export=1
+load_epel_filterb x7, x6
+sub x2, x2, x3
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.16b}, [x2], x3
+ld1 {v17.16b}, [x2], x3
+ld1 {v18.16b}, [x2], x3
+.mac

[FFmpeg-cvslog] lavc/aarch64: new optimization for 8-bit hevc_epel_bi_hv

2023-12-01 Thread Logan Lyu
ffmpeg | branch: master | Logan Lyu  | Sat Nov 11 
19:57:40 2023 +0800| [00290a64f758acafef80d88bb06760cd7bbd9eac] | committer: 
Martin Storsjö

lavc/aarch64: new optimization for 8-bit hevc_epel_bi_hv

put_hevc_epel_bi_hv4_8_c: 242.9
put_hevc_epel_bi_hv4_8_i8mm: 68.6
put_hevc_epel_bi_hv6_8_c: 402.4
put_hevc_epel_bi_hv6_8_i8mm: 135.9
put_hevc_epel_bi_hv8_8_c: 636.4
put_hevc_epel_bi_hv8_8_i8mm: 145.6
put_hevc_epel_bi_hv12_8_c: 1363.1
put_hevc_epel_bi_hv12_8_i8mm: 324.1
put_hevc_epel_bi_hv16_8_c: .1
put_hevc_epel_bi_hv16_8_i8mm: 509.1
put_hevc_epel_bi_hv24_8_c: 4793.4
put_hevc_epel_bi_hv24_8_i8mm: 1091.9
put_hevc_epel_bi_hv32_8_c: 8393.9
put_hevc_epel_bi_hv32_8_i8mm: 1720.6
put_hevc_epel_bi_hv48_8_c: 19526.6
put_hevc_epel_bi_hv48_8_i8mm: 4285.9
put_hevc_epel_bi_hv64_8_c: 33915.4
put_hevc_epel_bi_hv64_8_i8mm: 6783.6

Co-Authored-By: J. Dekker 
Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=00290a64f758acafef80d88bb06760cd7bbd9eac
---

 libavcodec/aarch64/hevcdsp_epel_neon.S| 330 ++
 libavcodec/aarch64/hevcdsp_init_aarch64.c |   5 +
 2 files changed, 335 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
b/libavcodec/aarch64/hevcdsp_epel_neon.S
index 2f9e7e46c4..2dafa09337 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -3203,6 +3203,336 @@ function ff_hevc_put_hevc_epel_uni_w_hv64_8_neon_i8mm, 
export=1
 ret
 endfunc
 
+
+function ff_hevc_put_hevc_epel_bi_hv4_8_neon_i8mm, export=1
+add w10, w5, #3
+lsl x10, x10, #7
+sub sp, sp, x10 // tmp_array
+stp x7, x30, [sp, #-48]!
+stp x4, x5, [sp, #16]
+stp x0, x1, [sp, #32]
+add x0, sp, #48
+sub x1, x2, x3
+mov x2, x3
+add w3, w5, #3
+mov x4, x6
+mov x5, x7
+bl  X(ff_hevc_put_hevc_epel_h4_8_neon_i8mm)
+ldp x4, x5, [sp, #16]
+ldp x0, x1, [sp, #32]
+ldp x7, x30, [sp], #48
+load_epel_filterh x7, x6
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.4h}, [sp], x10
+ld1 {v17.4h}, [sp], x10
+ld1 {v18.4h}, [sp], x10
+.macro calc src0, src1, src2, src3
+ld1 {\src3\().4h}, [sp], x10
+calc_epelh  v4, \src0, \src1, \src2, \src3
+ld1 {v6.4h}, [x4], x10
+sqadd   v4.4h, v4.4h, v6.4h
+sqrshrunv4.8b, v4.8h, #7
+subsw5, w5, #1
+st1 {v4.s}[0], [x0], x1
+.endm
+1:  calc_all4
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_hv6_8_neon_i8mm, export=1
+add w10, w5, #3
+lsl x10, x10, #7
+sub sp, sp, x10 // tmp_array
+stp x7, x30, [sp, #-48]!
+stp x4, x5, [sp, #16]
+stp x0, x1, [sp, #32]
+add x0, sp, #48
+sub x1, x2, x3
+mov x2, x3
+add w3, w5, #3
+mov x4, x6
+mov x5, x7
+bl  X(ff_hevc_put_hevc_epel_h6_8_neon_i8mm)
+ldp x4, x5, [sp, #16]
+ldp x0, x1, [sp, #32]
+ldp x7, x30, [sp], #48
+load_epel_filterh x7, x6
+sub x1, x1, #4
+mov x10, #(MAX_PB_SIZE * 2)
+ld1 {v16.8h}, [sp], x10
+ld1 {v17.8h}, [sp], x10
+ld1 {v18.8h}, [sp], x10
+.macro calc src0, src1, src2, src3
+ld1 {\src3\().8h}, [sp], x10
+calc_epelh  v4, \src0, \src1, \src2, \src3
+calc_epelh2 v4, v5, \src0, \src1, \src2, \src3
+ld1 {v6.8h}, [x4], x10
+sqadd   v4.8h, v4.8h, v6.8h
+sqrshrunv4.8b, v4.8h, #7
+st1 {v4.s}[0], [x0], #4
+subsw5, w5, #1
+st1 {v4.h}[2], [x0], x1
+.endm
+1:  calc_all4
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_epel_bi_hv8_8_neon_i8mm, export=1
+add w10, w5, #3
+lsl x10, x10, #7
+sub sp, sp, x10 // tmp_array
+stp x7, x30, [sp, #-48]!
+stp x4, x5, [sp, #16]
+stp x0, x1, [sp, #32]
+add x0, sp, #48
+sub x1, x2, x3
+mov x2, x3
+add w3, w5, #3
+mov x4, x6
+mov x5, x7
+bl  X(ff_hevc_put_hevc_epel_h8_8_neon_i8mm)
+ldp x4, x5, [sp, #16]
+ldp 

[FFmpeg-cvslog] lavc/aarch64: new optimization for 8-bit hevc_qpel_bi_v

2023-12-01 Thread Logan Lyu
ffmpeg | branch: master | Logan Lyu  | Sun Nov 12 
08:32:10 2023 +0800| [595f97028b827a14dd979c76468e4da93b3adfd5] | committer: 
Martin Storsjö

lavc/aarch64: new optimization for 8-bit hevc_qpel_bi_v

put_hevc_qpel_bi_v4_8_c: 166.1
put_hevc_qpel_bi_v4_8_neon: 61.9
put_hevc_qpel_bi_v6_8_c: 309.4
put_hevc_qpel_bi_v6_8_neon: 75.6
put_hevc_qpel_bi_v8_8_c: 531.1
put_hevc_qpel_bi_v8_8_neon: 78.1
put_hevc_qpel_bi_v12_8_c: 1139.9
put_hevc_qpel_bi_v12_8_neon: 238.1
put_hevc_qpel_bi_v16_8_c: 2063.6
put_hevc_qpel_bi_v16_8_neon: 308.9
put_hevc_qpel_bi_v24_8_c: 4317.1
put_hevc_qpel_bi_v24_8_neon: 629.9
put_hevc_qpel_bi_v32_8_c: 8241.9
put_hevc_qpel_bi_v32_8_neon: 1140.1
put_hevc_qpel_bi_v48_8_c: 18422.9
put_hevc_qpel_bi_v48_8_neon: 2533.9
put_hevc_qpel_bi_v64_8_c: 37508.6
put_hevc_qpel_bi_v64_8_neon: 4520.1

Co-Authored-By: J. Dekker 
Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=595f97028b827a14dd979c76468e4da93b3adfd5
---

 libavcodec/aarch64/hevcdsp_init_aarch64.c |   5 +
 libavcodec/aarch64/hevcdsp_qpel_neon.S| 248 ++
 2 files changed, 253 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c2cbcd95e7..9552549897 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -251,6 +251,10 @@ NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst,  
ptrdiff_t _dststride,
 int height, int denom, int wx, int ox,
 intptr_t mx, intptr_t my, int width), _i8mm);
 
+NEON8_FNPROTO(qpel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
+const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
+int height, intptr_t mx, intptr_t my, int width),);
+
 #define NEON8_FNASSIGN(member, v, h, fn, ext) \
 member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext;  \
 member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext;  \
@@ -344,6 +348,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
const int bit_depth)
 NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 1, epel_bi_h,);
 NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 0, epel_bi_v,);
 NEON8_FNASSIGN(c->put_hevc_qpel_bi, 0, 0, pel_bi_pixels,);
+NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 0, qpel_bi_v,);
 NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,);
 NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,);
 NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,);
diff --git a/libavcodec/aarch64/hevcdsp_qpel_neon.S 
b/libavcodec/aarch64/hevcdsp_qpel_neon.S
index bcee627cba..d01dd24a78 100644
--- a/libavcodec/aarch64/hevcdsp_qpel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_qpel_neon.S
@@ -865,6 +865,254 @@ function ff_hevc_put_hevc_qpel_v64_8_neon, export=1
 ret
 endfunc
 
+function ff_hevc_put_hevc_qpel_bi_v4_8_neon, export=1
+load_qpel_filterb x7, x6
+sub x2, x2, x3, lsl #1
+sub x2, x2, x3
+mov x12, #(MAX_PB_SIZE * 2)
+ld1 {v16.s}[0], [x2], x3
+ld1 {v17.s}[0], [x2], x3
+ld1 {v18.s}[0], [x2], x3
+ld1 {v19.s}[0], [x2], x3
+ld1 {v20.s}[0], [x2], x3
+ld1 {v21.s}[0], [x2], x3
+ld1 {v22.s}[0], [x2], x3
+.macro calc tmp, src0, src1, src2, src3, src4, src5, src6, src7
+ld1 {\tmp\().s}[0], [x2], x3
+moviv24.8h, #0
+calc_qpelb  v24, \src0, \src1, \src2, \src3, \src4, \src5, \src6, 
\src7
+ld1 {v25.4h}, [x4], x12 // src2
+sqadd   v24.8h, v24.8h, v25.8h
+sqrshrunv25.8b, v24.8h, #7
+subsw5, w5, #1
+st1 {v25.s}[0], [x0], x1
+.endm
+1:  calc_all
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_qpel_bi_v6_8_neon, export=1
+load_qpel_filterb x7, x6
+sub x2, x2, x3, lsl #1
+sub x2, x2, x3
+ld1 {v16.8b}, [x2], x3
+sub x1, x1, #4
+ld1 {v17.8b}, [x2], x3
+mov x12, #(MAX_PB_SIZE * 2)
+ld1 {v18.8b}, [x2], x3
+ld1 {v19.8b}, [x2], x3
+ld1 {v20.8b}, [x2], x3
+ld1 {v21.8b}, [x2], x3
+ld1 {v22.8b}, [x2], x3
+.macro calc tmp, src0, src1, src2, src3, src4, src5, src6, src7
+ld1 {\tmp\().8b}, [x2], x3
+moviv24.8h, #0
+calc_qpelb  v24, \src0, \src1, \src2, \src3, \src4, \src5, \src6, 
\src7
+ld1 {v25.8h}, [x4], x12 // src2
+sqadd   v24.8h, v24.8h, v25.8h
+sqrshrunv25.8b, v24.8h, #7
+st1 {v25.s}[0], [x0], #4
+subsw5, w5, #1
+st1 {v25.h}[2], [x0], x1
+.endm
+1:  calc

[FFmpeg-cvslog] lavc/aarch64: new optimization for 8-bit hevc_qpel_bi_hv

2023-12-01 Thread Logan Lyu
ffmpeg | branch: master | Logan Lyu  | Sun Nov 12 
09:03:28 2023 +0800| [fa0470347e326fe1c9f54ab3dcdbdfa67fa5eddd] | committer: 
Martin Storsjö

lavc/aarch64: new optimization for 8-bit hevc_qpel_bi_hv

put_hevc_qpel_bi_hv4_8_c: 433.7
put_hevc_qpel_bi_hv4_8_i8mm: 117.9
put_hevc_qpel_bi_hv6_8_c: 803.9
put_hevc_qpel_bi_hv6_8_i8mm: 252.7
put_hevc_qpel_bi_hv8_8_c: 1296.4
put_hevc_qpel_bi_hv8_8_i8mm: 316.2
put_hevc_qpel_bi_hv12_8_c: 2867.4
put_hevc_qpel_bi_hv12_8_i8mm: 669.2
put_hevc_qpel_bi_hv16_8_c: 4709.4
put_hevc_qpel_bi_hv16_8_i8mm: 929.9
put_hevc_qpel_bi_hv24_8_c: 9639.7
put_hevc_qpel_bi_hv24_8_i8mm: 2072.4
put_hevc_qpel_bi_hv32_8_c: 16663.7
put_hevc_qpel_bi_hv32_8_i8mm: 3391.4
put_hevc_qpel_bi_hv48_8_c: 36972.9
put_hevc_qpel_bi_hv48_8_i8mm: 7505.7
put_hevc_qpel_bi_hv64_8_c: 64106.4
put_hevc_qpel_bi_hv64_8_i8mm: 13145.2

Co-Authored-By: J. Dekker 
Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fa0470347e326fe1c9f54ab3dcdbdfa67fa5eddd
---

 libavcodec/aarch64/hevcdsp_init_aarch64.c |   5 +
 libavcodec/aarch64/hevcdsp_qpel_neon.S| 299 ++
 2 files changed, 304 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 9552549897..687b6cc5c3 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -255,6 +255,10 @@ NEON8_FNPROTO(qpel_bi_v, (uint8_t *dst, ptrdiff_t 
dststride,
 const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
 int height, intptr_t mx, intptr_t my, int width),);
 
+NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
+const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
+int height, intptr_t mx, intptr_t my, int width), _i8mm);
+
 #define NEON8_FNASSIGN(member, v, h, fn, ext) \
 member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext;  \
 member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext;  \
@@ -370,6 +374,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
const int bit_depth)
 NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h, _i8mm);
 NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv, _i8mm);
 NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, 
qpel_uni_w_hv, _i8mm);
+NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 1, qpel_bi_hv, _i8mm);
 }
 
 }
diff --git a/libavcodec/aarch64/hevcdsp_qpel_neon.S 
b/libavcodec/aarch64/hevcdsp_qpel_neon.S
index d01dd24a78..9be29cafe2 100644
--- a/libavcodec/aarch64/hevcdsp_qpel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_qpel_neon.S
@@ -4200,5 +4200,304 @@ function ff_hevc_put_hevc_qpel_uni_w_hv64_8_neon_i8mm, 
export=1
 ret
 endfunc
 
+function ff_hevc_put_hevc_qpel_bi_hv4_8_neon_i8mm, export=1
+add w10, w5, #7
+lsl x10, x10, #7
+sub sp, sp, x10 // tmp_array
+stp x7, x30, [sp, #-48]!
+stp x4, x5, [sp, #16]
+stp x0, x1, [sp, #32]
+sub x1, x2, x3, lsl #1
+sub x1, x1, x3
+add x0, sp, #48
+mov x2, x3
+add w3, w5, #7
+mov x4, x6
+bl  X(ff_hevc_put_hevc_qpel_h4_8_neon_i8mm)
+ldp x4, x5, [sp, #16]
+ldp x0, x1, [sp, #32]
+ldp x7, x30, [sp], #48
+mov x9, #(MAX_PB_SIZE * 2)
+load_qpel_filterh x7, x6
+ld1 {v16.4h}, [sp], x9
+ld1 {v17.4h}, [sp], x9
+ld1 {v18.4h}, [sp], x9
+ld1 {v19.4h}, [sp], x9
+ld1 {v20.4h}, [sp], x9
+ld1 {v21.4h}, [sp], x9
+ld1 {v22.4h}, [sp], x9
+.macro calc tmp, src0, src1, src2, src3, src4, src5, src6, src7
+ld1 {\tmp\().4h}, [sp], x9
+calc_qpelh  v1, \src0, \src1, \src2, \src3, \src4, \src5, \src6, 
\src7, sshr
+ld1 {v5.4h}, [x4], x9 // src2
+saddw   v1.4s, v1.4s, v5.4h
+rshrn   v1.4h, v1.4s, #7
+sqxtun  v1.8b, v1.8h
+subsw5, w5, #1
+st1 {v1.s}[0], [x0], x1
+.endm
+1:  calc_all
+.purgem calc
+2:  ret
+endfunc
+
+function ff_hevc_put_hevc_qpel_bi_hv6_8_neon_i8mm, export=1
+add w10, w5, #7
+lsl x10, x10, #7
+sub sp, sp, x10 // tmp_array
+stp x7, x30, [sp, #-48]!
+stp x4, x5, [sp, #16]
+stp x0, x1, [sp, #32]
+sub x1, x2, x3, lsl #1
+sub x1, x1, x3
+add x0, sp, #48
+mov x2, x3
+add x3, x5, #7
+mov x4, x6
+bl  X(f

[FFmpeg-cvslog] avfilter/vf_chromanr: compare correct variables for advanced mode

2023-12-01 Thread Paul B Mahol
ffmpeg | branch: master | Paul B Mahol  | Fri Dec  1 21:11:57 
2023 +0100| [db7b8382376e6b49cfc44583036759be59156f22] | committer: Paul B Mahol

avfilter/vf_chromanr: compare correct variables for advanced mode

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=db7b8382376e6b49cfc44583036759be59156f22
---

 libavfilter/vf_chromanr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_chromanr.c b/libavfilter/vf_chromanr.c
index dd49d8670a..6f969f981c 100644
--- a/libavfilter/vf_chromanr.c
+++ b/libavfilter/vf_chromanr.c
@@ -158,7 +158,7 @@ static int distance ## _slice##name(AVFilterContext *ctx, 
void *arg,
 su += U;   
\
 sv += V;   
\
 cn++;  
\
-} else if (fun(cyY, cuU, cvV) < thres) {   
\
+} else if (!extra && fun(cyY, cuU, cvV) < thres) { 
\
 su += U;   
\
 sv += V;   
\
 cn++;  
\
@@ -210,7 +210,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 s->thres_u = s->threshold_u * (1 << (s->depth - 8));
 s->thres_v = s->threshold_v * (1 << (s->depth - 8));
 
-if (s->thres_y < 200.f || s->thres_u < 200.f || s->thres_v < 200.f) {
+if (s->threshold_y < 200.f || s->threshold_u < 200.f || s->threshold_v < 
200.f) {
 switch (s->distance) {
 case 0:
 s->filter_slice = s->depth <= 8 ? manhattan_e_slice8 : 
manhattan_e_slice16;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avformat/mov: Fix integer overflow in mov_read_packet().

2023-12-01 Thread Dale Curtis
ffmpeg | branch: master | Dale Curtis  | Wed Nov 22 
22:17:37 2023 +| [2182173a6933c02b0853751034bd5e0bf829b5f7] | committer: 
Michael Niedermayer

avformat/mov: Fix integer overflow in mov_read_packet().

Fixes https://crbug.com/1499669:
runtime error: signed integer overflow: 9223372036853334272 + 1375731456
cannot be represented in type 'int64_t' (aka 'long')

Signed-off-by: Dale Curtis 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2182173a6933c02b0853751034bd5e0bf829b5f7
---

 libavformat/mov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 34ca8095c2..f7b5ec7a35 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -9006,7 +9006,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket 
*pkt)
 pkt->flags |= AV_PKT_FLAG_DISCARD;
 }
 if (sc->ctts_data && sc->ctts_index < sc->ctts_count) {
-pkt->pts = pkt->dts + sc->dts_shift + 
sc->ctts_data[sc->ctts_index].duration;
+pkt->pts = av_sat_add64(pkt->dts, av_sat_add64(sc->dts_shift, 
sc->ctts_data[sc->ctts_index].duration));
 /* update ctts context */
 sc->ctts_sample++;
 if (sc->ctts_index < sc->ctts_count &&

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".