On 2018-01-16 22:26, Martin Vignali wrote:
> diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
> index d7cd996842..9db2d90e57 100644
> --- a/libavutil/x86/x86util.asm
> +++ b/libavutil/x86/x86util.asm
> @@ -335,7 +335,7 @@
>  %endmacro
>  
>  %macro ABS2 4
> -%if cpuflag(ssse3)
> +%if cpuflag(ssse3)||cpuflag(avx2)
>      pabsw   %1, %1
>      pabsw   %2, %2
>  %elif cpuflag(mmxext) ; a, b, tmp0, tmp1

Why?  AVX2 implies all earlier flags.

> +;%1 dst, %2 src %3 xm fill by zero (only use in SSE2)
> +%macro PMOVZXBW 3
> +%if cpuflag(avx2)
> +    vpmovzxbw %1, %2
> +%else; SSE2
> +     movh      %1, %2
> +     punpcklbw %1, %3
> +%endif
> +%endmacro

Are you aware that SSE4.1 added the packed move sign/zero extend
instructions?  I don't suggest that you make an SSE4 but if you use many
3-operand instructions an AVX version might be worthwhile.

> @@ -85,4 +102,25 @@ av_cold void ff_blend_init_x86(FilterParams *param, int 
> is_16bit)
>          case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   
> break;
>          }
>      }
> +    if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1 && !is_16bit) {
> +        switch (param->mode) {
> +        case BLEND_ADDITION: param->blend = ff_blend_addition_avx2; break;
> +        case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_avx2; 
> break;
> +        case BLEND_AND:      param->blend = ff_blend_and_avx2;      break;
> +        case BLEND_AVERAGE:  param->blend = ff_blend_average_avx2;  break;
> +        case BLEND_DARKEN:   param->blend = ff_blend_darken_avx2;   break;
> +        case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_avx2; 
> break;
> +        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_avx2;  break;
> +        case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_avx2;  break;
> +        case BLEND_MULTIPLY: param->blend = ff_blend_multiply_avx2; break;
> +        case BLEND_OR:       param->blend = ff_blend_or_avx2;       break;
> +        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_avx2;  break;
> +        case BLEND_SCREEN:   param->blend = ff_blend_screen_avx2;   break;
> +        case BLEND_SUBTRACT: param->blend = ff_blend_subtract_avx2; break;
> +        case BLEND_XOR:      param->blend = ff_blend_xor_avx2;      break;
> +        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_avx2; 
> break;
> +        case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_avx2;  
> break;
> +        case BLEND_NEGATION:   param->blend = ff_blend_negation_avx2;   
> break;
> +        }
> +    }
>  }

If you're going to align things vertically then do it for every line.


Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to