On Sun, May 7, 2017 at 11:05 AM, Matthieu Bouron <matthieu.bou...@gmail.com> wrote:
> > > Le 2 mai 2017 12:01 PM, "Benoit Fouet" <benoit.fo...@free.fr> a écrit : > > Hi, > > > On 28/04/2017 21:58, Matthieu Bouron wrote: > > Untested: fixes ticket #6324. > > --- > > libavcodec/aarch64/simple_idct_neon.S | 12 ++++++------ > > 1 file changed, 6 insertions(+), 6 deletions(-) > > > > diff --git a/libavcodec/aarch64/simple_idct_neon.S > b/libavcodec/aarch64/simple_idct_neon.S > > index 52273420f9..d31f72a609 100644 > > --- a/libavcodec/aarch64/simple_idct_neon.S > > +++ b/libavcodec/aarch64/simple_idct_neon.S > > @@ -61,19 +61,19 @@ endconst > > br x10 > > .endm > > > > -.macro smull1 a b c > > +.macro smull1 a, b, c > > smull \a, \b, \c > > .endm > > > > -.macro smlal1 a b c > > +.macro smlal1 a, b, c > > smlal \a, \b, \c > > .endm > > > > -.macro smlsl1 a b c > > +.macro smlsl1 a, b, c > > smlsl \a, \b, \c > > .endm > > > > -.macro idct_col4_top y1 y2 y3 y4 i l > > +.macro idct_col4_top y1, y2, y3, y4, i, l > > smull\i v7.4S, \y3\().\l, z2 > > smull\i v16.4S, \y3\().\l, z6 > > smull\i v17.4S, \y2\().\l, z1 > > @@ -91,7 +91,7 @@ endconst > > smlsl\i v6.4S, \y4\().\l, z5 > > .endm > > > > -.macro idct_row4_neon y1 y2 y3 y4 pass > > +.macro idct_row4_neon y1, y2, y3, y4, pass > > ld1 {\y1\().2D-\y2\().2D}, [x2], #32 > > movi v23.4S, #1<<2, lsl #8 > > orr v5.16B, \y1\().16B, \y2\().16B > > @@ -153,7 +153,7 @@ endconst > > trn2 \y4\().4S, v17.4S, v19.4S > > .endm > > > > -.macro declare_idct_col4_neon i l > > +.macro declare_idct_col4_neon i, l > > function idct_col4_neon\i > > dup v23.4H, z4c > > .if \i == 1 > > Sounds sane, but shouldn't we be doing this for all instances of > multiple arguments macros without commas? > > > Sure, I may have missed some. I will work again on this patch on Tuesday > as I will have access to an apple machine (and hopefully fix the build > without gas-preprocessor). > > Sorry for the delay, > Matthieu > > Updated patch attached: * add missing commas to separate macro arguments * passes .4H/.8H as macro arguments instead of .4H/.8H (the later form being interpreted as an hexadecimal value, ie: 4/8).
From e27ac0f3a8b6436a7530ee5c5c514bfdfac4a558 Mon Sep 17 00:00:00 2001 From: Matthieu Bouron <matthieu.bou...@gmail.com> Date: Fri, 28 Apr 2017 21:58:55 +0200 Subject: [PATCH] lavc/aarch64/simple_idct: fix iOS build without gas-preprocessor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separates macro arguments with commas and passes .4H/.8H as macro arguments instead of 4H/8H (the later form being interpreted as an hexadecimal value). Fixes ticket #6324. Suggested-by: Martin Storsjö <mar...@martin.st> --- libavcodec/aarch64/simple_idct_neon.S | 74 +++++++++++++++++------------------ 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S index 52273420f9..92987985d2 100644 --- a/libavcodec/aarch64/simple_idct_neon.S +++ b/libavcodec/aarch64/simple_idct_neon.S @@ -61,37 +61,37 @@ endconst br x10 .endm -.macro smull1 a b c +.macro smull1 a, b, c smull \a, \b, \c .endm -.macro smlal1 a b c +.macro smlal1 a, b, c smlal \a, \b, \c .endm -.macro smlsl1 a b c +.macro smlsl1 a, b, c smlsl \a, \b, \c .endm -.macro idct_col4_top y1 y2 y3 y4 i l - smull\i v7.4S, \y3\().\l, z2 - smull\i v16.4S, \y3\().\l, z6 - smull\i v17.4S, \y2\().\l, z1 +.macro idct_col4_top y1, y2, y3, y4, i, l + smull\i v7.4S, \y3\l, z1 + smull\i v16.4S, \y3\l, z6 + smull\i v17.4S, \y2\l, z1 add v19.4S, v23.4S, v7.4S - smull\i v18.4S, \y2\().\l, z3 + smull\i v18.4S, \y2\l, z3 add v20.4S, v23.4S, v16.4S - smull\i v5.4S, \y2\().\l, z5 + smull\i v5.4S, \y2\l, z5 sub v21.4S, v23.4S, v16.4S - smull\i v6.4S, \y2\().\l, z7 + smull\i v6.4S, \y2\l, z7 sub v22.4S, v23.4S, v7.4S - smlal\i v17.4S, \y4\().\l, z3 - smlsl\i v18.4S, \y4\().\l, z7 - smlsl\i v5.4S, \y4\().\l, z1 - smlsl\i v6.4S, \y4\().\l, z5 + smlal\i v17.4S, \y4\l, z3 + smlsl\i v18.4S, \y4\l, z7 + smlsl\i v5.4S, \y4\l, z1 + smlsl\i v6.4S, \y4\l, z5 .endm -.macro idct_row4_neon y1 y2 y3 y4 pass +.macro idct_row4_neon y1, y2, y3, y4, pass ld1 {\y1\().2D-\y2\().2D}, [x2], #32 movi v23.4S, #1<<2, lsl #8 orr v5.16B, \y1\().16B, \y2\().16B @@ -101,7 +101,7 @@ endconst mov x3, v5.D[1] smlal v23.4S, \y1\().4H, z4 - idct_col4_top \y1 \y2 \y3 \y4 1 4H + idct_col4_top \y1, \y2, \y3, \y4, 1, .4H cmp x3, #0 beq \pass\()f @@ -153,7 +153,7 @@ endconst trn2 \y4\().4S, v17.4S, v19.4S .endm -.macro declare_idct_col4_neon i l +.macro declare_idct_col4_neon i, l function idct_col4_neon\i dup v23.4H, z4c .if \i == 1 @@ -164,14 +164,14 @@ function idct_col4_neon\i .endif smull v23.4S, v23.4H, z4 - idct_col4_top v24 v25 v26 v27 \i \l + idct_col4_top v24, v25, v26, v27, \i, \l mov x4, v28.D[\i - 1] mov x5, v29.D[\i - 1] cmp x4, #0 beq 1f - smull\i v7.4S, v28.\l, z4 + smull\i v7.4S, v28\l, z4 add v19.4S, v19.4S, v7.4S sub v20.4S, v20.4S, v7.4S sub v21.4S, v21.4S, v7.4S @@ -181,17 +181,17 @@ function idct_col4_neon\i cmp x5, #0 beq 2f - smlal\i v17.4S, v29.\l, z5 - smlsl\i v18.4S, v29.\l, z1 - smlal\i v5.4S, v29.\l, z7 - smlal\i v6.4S, v29.\l, z3 + smlal\i v17.4S, v29\l, z5 + smlsl\i v18.4S, v29\l, z1 + smlal\i v5.4S, v29\l, z7 + smlal\i v6.4S, v29\l, z3 2: mov x5, v31.D[\i - 1] cmp x4, #0 beq 3f - smull\i v7.4S, v30.\l, z6 - smull\i v16.4S, v30.\l, z2 + smull\i v7.4S, v30\l, z6 + smull\i v16.4S, v30\l, z2 add v19.4S, v19.4S, v7.4S sub v22.4S, v22.4S, v7.4S sub v20.4S, v20.4S, v16.4S @@ -200,10 +200,10 @@ function idct_col4_neon\i 3: cmp x5, #0 beq 4f - smlal\i v17.4S, v31.\l, z7 - smlsl\i v18.4S, v31.\l, z5 - smlal\i v5.4S, v31.\l, z3 - smlsl\i v6.4S, v31.\l, z1 + smlal\i v17.4S, v31\l, z7 + smlsl\i v18.4S, v31\l, z5 + smlal\i v5.4S, v31\l, z3 + smlsl\i v6.4S, v31\l, z1 4: addhn v7.4H, v19.4S, v17.4S addhn2 v7.8H, v20.4S, v18.4S @@ -219,14 +219,14 @@ function idct_col4_neon\i endfunc .endm -declare_idct_col4_neon 1 4H -declare_idct_col4_neon 2 8H +declare_idct_col4_neon 1, .4H +declare_idct_col4_neon 2, .8H function ff_simple_idct_put_neon, export=1 idct_start x2 - idct_row4_neon v24 v25 v26 v27 1 - idct_row4_neon v28 v29 v30 v31 2 + idct_row4_neon v24, v25, v26, v27, 1 + idct_row4_neon v28, v29, v30, v31, 2 bl idct_col4_neon1 sqshrun v1.8B, v7.8H, #COL_SHIFT-16 @@ -263,8 +263,8 @@ endfunc function ff_simple_idct_add_neon, export=1 idct_start x2 - idct_row4_neon v24 v25 v26 v27 1 - idct_row4_neon v28 v29 v30 v31 2 + idct_row4_neon v24, v25, v26, v27, 1 + idct_row4_neon v28, v29, v30, v31, 2 bl idct_col4_neon1 sshr v1.8H, V7.8H, #COL_SHIFT-16 @@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1 idct_start x0 mov x2, x0 - idct_row4_neon v24 v25 v26 v27 1 - idct_row4_neon v28 v29 v30 v31 2 + idct_row4_neon v24, v25, v26, v27, 1 + idct_row4_neon v28, v29, v30, v31, 2 add x2, x2, #-128 bl idct_col4_neon1 -- 2.12.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel