From: Henrik Gramner <hen...@gramner.com> Most VEX-encoded instructions require an additional byte to encode when src2 is a high register (e.g. x|ymm8..15). If the instruction is commutative we can swap src1 and src2 when doing so reduces the instruction length, e.g.
vpaddw xmm0, xmm0, xmm8 -> vpaddw xmm0, xmm8, xmm0 --- libavutil/x86/x86inc.asm | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index bc370a6186..39cba5db09 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1244,9 +1244,40 @@ INIT_XMM %elif %0 >= 9 __instr %6, %7, %8, %9 %elif %0 == 8 - __instr %6, %7, %8 + %if avx_enabled && %5 + %xdefine __src1 %7 + %xdefine __src2 %8 + %ifnum regnumof%7 + %ifnum regnumof%8 + %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32 + ; Most VEX-encoded instructions require an additional byte to encode when + ; src2 is a high register (e.g. m8..15). If the instruction is commutative + ; we can swap src1 and src2 when doing so reduces the instruction length. + %xdefine __src1 %8 + %xdefine __src2 %7 + %endif + %endif + %endif + __instr %6, __src1, __src2 + %else + __instr %6, %7, %8 + %endif %elif %0 == 7 - __instr %6, %7 + %if avx_enabled && %5 + %xdefine __src1 %6 + %xdefine __src2 %7 + %ifnum regnumof%6 + %ifnum regnumof%7 + %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32 + %xdefine __src1 %7 + %xdefine __src2 %6 + %endif + %endif + %endif + __instr %6, __src1, __src2 + %else + __instr %6, %7 + %endif %else __instr %6 %endif -- 2.22.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".