On 10/12/2015 12:20 AM, Rodger Combs wrote: > --- > libavutil/aes.c | 2 ++ > libavutil/aes_internal.h | 2 ++ > libavutil/x86/Makefile | 4 ++- > libavutil/x86/aes.asm | 85 > ++++++++++++++++++++++++++++++++++++++++++++++++ > libavutil/x86/aes_init.c | 42 ++++++++++++++++++++++++ > 5 files changed, 134 insertions(+), 1 deletion(-) > create mode 100644 libavutil/x86/aes.asm > create mode 100644 libavutil/x86/aes_init.c > > diff --git a/libavutil/aes.c b/libavutil/aes.c > index c917706..61ab8f7 100644 > --- a/libavutil/aes.c > +++ b/libavutil/aes.c > @@ -200,6 +200,8 @@ int av_aes_init(AVAES *a, const uint8_t *key, int > key_bits, int decrypt) > uint8_t alog8[512]; > > a->crypt = decrypt ? aes_decrypt : aes_encrypt; > + if (ARCH_X86) > + ff_init_aes_x86(a, decrypt); > > if > (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl)-1][FF_ARRAY_ELEMS(enc_multbl[0])-1]) { > j = 1; > diff --git a/libavutil/aes_internal.h b/libavutil/aes_internal.h > index 37b9568..2150085 100644 > --- a/libavutil/aes_internal.h > +++ b/libavutil/aes_internal.h > @@ -39,4 +39,6 @@ typedef struct AVAES { > void (*crypt)(struct AVAES *a, uint8_t *dst, const uint8_t *src, int > count, uint8_t *iv); > } AVAES; > > +void ff_init_aes_x86(AVAES *a, int decrypt); > + > #endif /* AVUTIL_AES_INTERNAL_H */ > diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile > index eb70a62..4ac6219 100644 > --- a/libavutil/x86/Makefile > +++ b/libavutil/x86/Makefile > @@ -1,4 +1,5 @@ > -OBJS += x86/cpu.o \ > +OBJS += x86/aes_init.o \ > + x86/cpu.o \ > x86/float_dsp_init.o \ > x86/lls_init.o \ > > @@ -10,5 +11,6 @@ YASM-OBJS += x86/cpuid.o > \ > $(EMMS_OBJS__yes_) \ > x86/float_dsp.o \ > x86/lls.o \ > + x86/aes.o \ > > YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ > diff --git a/libavutil/x86/aes.asm b/libavutil/x86/aes.asm > new file mode 100644 > index 0000000..7fb9130 > --- /dev/null > +++ b/libavutil/x86/aes.asm > @@ -0,0 +1,85 @@ > +;***************************************************************************** > +;* Copyright (c) 2015 Rodger Combs <rodger.co...@gmail.com> > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;****************************************************************************** > + > +%include "x86util.asm" > + > +SECTION .text > + > +;----------------------------------------------------------------------------- > +; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src, int count, > uint8_t *iv) > +;----------------------------------------------------------------------------- > +%macro AES_CRYPT 1 > +%if %1 == 1 > +%define CRYPT aesdec > +%define LAST aesdeclast > +cglobal aes_decrypt, 5,6,2 > +%else > +%define CRYPT aesenc > +%define LAST aesenclast > +cglobal aes_encrypt, 5,6,2 > +%endif > + mov r3d, r3d
Why? just use r3d below for the dec and test instructions instead. > + pxor xm1, xm1 > + test r4, r4 > + je .block > + movdqu xm1, [r4] ; iv > +.block: > + mov r5d, [r0 + 17 * 16] > + imul r5, 16 > + movdqu xm0, [r2] ; state > +%if %1 == 0 > + pxor xm0, xm1 > +%endif > + pxor xm0, [r0 + r5] > +.round: > + sub r5, 16 > + CRYPT xm0, [r0 + r5] > + cmp r5, 16 > + jg .round > + LAST xm0, [r0] > +%if %1 == 1 > + pxor xm0, xm1 > + movdqu xm1, [r2] > +%endif > + movdqu [r1], xm0 > + dec r3 > + add r2, 16 > + add r1, 16 > + test r3, r3 > + jne .block > +%if %1 == 0 > + test r4, r4 > + je .ret > + movdqu [r4], xm0 > +.ret: > +%endif > + REP_RET > +%endmacro > + > +%if HAVE_AESNI_EXTERNAL > +INIT_XMM aesni > +AES_CRYPT 0 > +AES_CRYPT 1 > +%if HAVE_AVX_EXTERNAL > +INIT_XMM avx > +AES_CRYPT 0 > +AES_CRYPT 1 This is not really needed. You're not gaining anything by using the VEX coding scheme. The aesni version is IMO enough. > +%endif > +%endif > diff --git a/libavutil/x86/aes_init.c b/libavutil/x86/aes_init.c > new file mode 100644 > index 0000000..250dbc8 > --- /dev/null > +++ b/libavutil/x86/aes_init.c > @@ -0,0 +1,42 @@ > +/* > + * Copyright (c) 2015 Rodger Combs <rodger.co...@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include <stddef.h> > +#include "libavutil/aes_internal.h" > +#include "libavutil/x86/cpu.h" > + > +void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int > count, uint8_t *iv); > +void ff_aes_decrypt_avx(AVAES *a, uint8_t *dst, const uint8_t *src, int > count, uint8_t *iv); > + > +void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int > count, uint8_t *iv); > +void ff_aes_encrypt_avx(AVAES *a, uint8_t *dst, const uint8_t *src, int > count, uint8_t *iv); > + > +av_cold void ff_init_aes_x86(AVAES *a, int decrypt) av_aes_init() is not av_cold, so probably unneeded here. > +{ > +#if HAVE_YASM > + int cpu_flags = av_get_cpu_flags(); > + if (EXTERNAL_AESNI(cpu_flags)) { > + if (EXTERNAL_AVX(cpu_flags)) > + a->crypt = decrypt ? ff_aes_decrypt_avx : ff_aes_encrypt_avx; > + else > + a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni; > + } > +#endif /* HAVE_YASM */ > +} > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel