ffmpeg | branch: master | Martin Storsjö <mar...@martin.st> | Thu Jan 12 16:52:33 2017 +0200| [f0ecbb13cf1cf706a1350dad657219dc7b3c131e] | committer: Martin Storsjö
arm/aarch64: vp9lpf: Calculate !hev directly Previously we first calculated hev, and then negated it. Since we were able to schedule the negation in the middle of another calculation, we don't see any gain in all cases. Before: Cortex A7 A8 A9 A53 A53/AArch64 vp9_loop_filter_v_4_8_neon: 147.0 129.0 115.8 89.0 88.7 vp9_loop_filter_v_8_8_neon: 242.0 198.5 174.7 140.0 136.7 vp9_loop_filter_v_16_8_neon: 500.0 419.5 382.7 293.0 275.7 vp9_loop_filter_v_16_16_neon: 971.2 825.5 731.5 579.0 453.0 After: vp9_loop_filter_v_4_8_neon: 143.0 127.7 114.8 88.0 87.7 vp9_loop_filter_v_8_8_neon: 241.0 197.2 173.7 140.0 136.7 vp9_loop_filter_v_16_8_neon: 497.0 419.5 379.7 293.0 275.7 vp9_loop_filter_v_16_16_neon: 965.2 818.7 731.4 579.0 452.0 This is cherrypicked from libav commit e1f9de86f454861b69b199ad801adc2ec6c3b220. Signed-off-by: Martin Storsjö <mar...@martin.st> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f0ecbb13cf1cf706a1350dad657219dc7b3c131e --- libavcodec/aarch64/vp9lpf_neon.S | 5 ++--- libavcodec/arm/vp9lpf_neon.S | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index 55e1964..7fe2c88 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -292,7 +292,7 @@ .if \mix != 0 sxtl v1.8h, v1.8b .endif - cmhi v5\sz, v5\sz, v3\sz // hev + cmhs v5\sz, v3\sz, v5\sz // !hev .if \wd == 8 // If a 4/8 or 8/4 mix is used, clear the relevant half of v6 .if \mix != 0 @@ -306,11 +306,10 @@ .elseif \wd == 8 bic v4\sz, v4\sz, v6\sz // fm && !flat8in .endif - mvn v5\sz, v5\sz // !hev + and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in .if \wd == 16 and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm .endif - and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in mul_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0) bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0 diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S index e96f4db..2761956 100644 --- a/libavcodec/arm/vp9lpf_neon.S +++ b/libavcodec/arm/vp9lpf_neon.S @@ -141,7 +141,7 @@ .if \wd == 8 vcle.u8 d6, d6, d0 @ flat8in .endif - vcgt.u8 d5, d5, d3 @ hev + vcle.u8 d5, d5, d3 @ !hev .if \wd == 8 vand d6, d6, d4 @ flat8in && fm .endif @@ -151,11 +151,10 @@ .elseif \wd == 8 vbic d4, d4, d6 @ fm && !flat8in .endif - vmvn d5, d5 @ !hev + vand d5, d5, d4 @ !hev && fm && !flat8in .if \wd == 16 vand d7, d7, d6 @ flat8out && flat8in && fm .endif - vand d5, d5, d4 @ !hev && fm && !flat8in vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0) vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0 _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog