This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit dc03cffe9c9577127ef82b6f56118115f900e5f2 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Dec 7 11:46:41 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Sun Jan 4 15:49:30 2026 +0100 avutil/crc: Use x86 clmul for CRC when available Observed near 10x speedup on AMD Zen4 7950x: av_crc_c: 22057.0 ( 1.00x) av_crc_clmul: 2202.8 (10.01x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavutil/crc.c | 24 +++++++++++++++++++ libavutil/x86/Makefile | 1 + libavutil/x86/crc.asm | 11 +++++---- libavutil/x86/crc.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 90 insertions(+), 8 deletions(-) diff --git a/libavutil/crc.c b/libavutil/crc.c index 34f507ead8..0b8d66d86d 100644 --- a/libavutil/crc.c +++ b/libavutil/crc.c @@ -25,6 +25,9 @@ #include "bswap.h" #include "crc.h" #include "error.h" +#if ARCH_X86 +#include "libavutil/x86/crc.h" +#endif #if CONFIG_HARDCODED_TABLES static const AVCRC av_crc_table[AV_CRC_MAX][257] = { @@ -348,6 +351,12 @@ int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size) if (ctx_size != sizeof(AVCRC) * 257 && ctx_size != sizeof(AVCRC) * 1024) return AVERROR(EINVAL); +#if ARCH_X86 + int done = ff_crc_init_x86(ctx, le, bits, poly, ctx_size); + if (done) + return 0; +#endif + for (i = 0; i < 256; i++) { if (le) { for (c = i, j = 0; j < 8; j++) @@ -375,6 +384,14 @@ const AVCRC *av_crc_get_table(AVCRCId crc_id) { if ((unsigned)crc_id >= AV_CRC_MAX) return NULL; +// Check for arch-specific extensions first to avoid initializing +// ordinary CRC tables unnecessarily. +#if ARCH_X86 + const AVCRC *table = ff_crc_get_table_x86(crc_id); + if (table) + return table; +#endif + #if !CONFIG_HARDCODED_TABLES switch (crc_id) { case AV_CRC_8_ATM: CRC_INIT_TABLE_ONCE(AV_CRC_8_ATM); break; @@ -394,6 +411,13 @@ const AVCRC *av_crc_get_table(AVCRCId crc_id) uint32_t av_crc(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer, size_t length) { + if (ctx[0]) { +#if ARCH_X86 + return ff_crc_x86(ctx, crc, buffer, length); +#endif + } + av_assert2(ctx[0] == 0); + const uint8_t *end = buffer + length; #if !CONFIG_SMALL diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 4e1b4b1176..901298b6cb 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -4,6 +4,7 @@ EMMS_OBJS_$(HAVE_MMX_INLINE)_$(HAVE_MMX_EXTERNAL)_$(HAVE_MM_EMPTY) = x86/emms.o X86ASM-OBJS += x86/aes.o x86/aes_init.o \ x86/cpuid.o \ + x86/crc.o \ $(EMMS_OBJS__yes_) \ x86/fixed_dsp.o x86/fixed_dsp_init.o \ x86/float_dsp.o x86/float_dsp_init.o \ diff --git a/libavutil/x86/crc.asm b/libavutil/x86/crc.asm index 95cf90d250..4f5673fbd7 100644 --- a/libavutil/x86/crc.asm +++ b/libavutil/x86/crc.asm @@ -138,6 +138,7 @@ SECTION .text %endmacro %macro CRC 1 +%define CTX r0+4 ;----------------------------------------------------------------------------------------------- ; ff_crc[_le]_clmul(const uint8_t *ctx, uint32_t crc, const uint8_t *buffer, size_t length ;----------------------------------------------------------------------------------------------- @@ -177,7 +178,7 @@ cglobal crc, 4, 6, 7+4*ARCH_X86_64, 0x10 mov r4, 64 cmp r3, 128 jb .reduce_4x_to_1 - movu m4, [r0] + movu m4, [CTX] .fold_4x_loop: movu m6, [r2 + r4 + 0] @@ -200,7 +201,7 @@ cglobal crc, 4, 6, 7+4*ARCH_X86_64, 0x10 jbe .fold_4x_loop .reduce_4x_to_1: - movu m4, [r0 + 16] + movu m4, [CTX + 16] FOLD_SINGLE m5, m1, m4, m3 FOLD_SINGLE m5, m1, m4, m2 FOLD_SINGLE m5, m1, m4, m0 @@ -245,10 +246,10 @@ cglobal crc, 4, 6, 7+4*ARCH_X86_64, 0x10 FOLD_SINGLE m5, m1, m4, m2 .reduce_128_to_64: - movu m4, [r0 + 32] + movu m4, [CTX + 32] FOLD_128_TO_64 %1, m1, m4, m5 .reduce_64_to_32: - movu m4, [r0 + 48] + movu m4, [CTX + 48] FOLD_64_TO_32 %1, m1, m4, m5 RET @@ -261,7 +262,7 @@ cglobal crc, 4, 6, 7+4*ARCH_X86_64, 0x10 pshufb m1, m10 %endif mov r4, 16 - movu m4, [r0 + 16] + movu m4, [CTX + 16] jmp .fold_1x_pre .less_than_16bytes: diff --git a/libavutil/x86/crc.h b/libavutil/x86/crc.h index 5fabfa7570..c836c090c6 100644 --- a/libavutil/x86/crc.h +++ b/libavutil/x86/crc.h @@ -24,8 +24,10 @@ #include "config.h" #include "libavutil/attributes.h" #include "libavutil/attributes_internal.h" +#include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/crc.h" +#include "libavutil/intreadwrite.h" #include "libavutil/reverse.h" #include "libavutil/x86/cpu.h" @@ -37,50 +39,64 @@ uint32_t ff_crc_le_clmul(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer, size_t length); FF_VISIBILITY_POP_HIDDEN -static const AVCRC crc_table_clmul[AV_CRC_MAX][16] = { +enum { + CRC_C = 0, + CLMUL_BE, + CLMUL_LE, +}; + +static const AVCRC crc_table_clmul[AV_CRC_MAX][17] = { [AV_CRC_8_ATM] = { + CLMUL_BE, 0x32000000, 0x0, 0xbc000000, 0x0, 0xc4000000, 0x0, 0x94000000, 0x0, 0x62000000, 0x0, 0x79000000, 0x0, 0x07156a16, 0x1, 0x07000000, 0x1, }, [AV_CRC_8_EBU] = { + CLMUL_BE, 0xb5000000, 0x0, 0xf3000000, 0x0, 0xfc000000, 0x0, 0x0d000000, 0x0, 0x6a000000, 0x0, 0x65000000, 0x0, 0x1c4b8192, 0x1, 0x1d000000, 0x1, }, [AV_CRC_16_ANSI] = { + CLMUL_BE, 0xf9e30000, 0x0, 0x807d0000, 0x0, 0xf9130000, 0x0, 0xff830000, 0x0, 0x807b0000, 0x0, 0x86630000, 0x0, 0xfffbffe7, 0x1, 0x80050000, 0x1, }, [AV_CRC_16_CCITT] = { + CLMUL_BE, 0x60190000, 0x0, 0x59b00000, 0x0, 0xd5f60000, 0x0, 0x45630000, 0x0, 0xaa510000, 0x0, 0xeb230000, 0x0, 0x11303471, 0x1, 0x10210000, 0x1, }, [AV_CRC_24_IEEE] = { + CLMUL_BE, 0x1f428700, 0x0, 0x467d2400, 0x0, 0x2c8c9d00, 0x0, 0x64e4d700, 0x0, 0xd9fe8c00, 0x0, 0xfd7e0c00, 0x0, 0xf845fe24, 0x1, 0x864cfb00, 0x1, }, [AV_CRC_32_IEEE] = { + CLMUL_BE, 0x8833794c, 0x0, 0xe6228b11, 0x0, 0xc5b9cd4c, 0x0, 0xe8a45605, 0x0, 0x490d678d, 0x0, 0xf200aa66, 0x0, 0x04d101df, 0x1, 0x04c11db7, 0x1, }, [AV_CRC_32_IEEE_LE] = { + CLMUL_LE, 0xc6e41596, 0x1, 0x54442bd4, 0x1, 0xccaa009e, 0x0, 0x751997d0, 0x1, 0xccaa009e, 0x0, 0x63cd6124, 0x1, 0xf7011640, 0x1, 0xdb710641, 0x1, }, [AV_CRC_16_ANSI_LE] = { + CLMUL_LE, 0x0000bffa, 0x0, 0x1b0c2, 0x0, 0x00018cc2, 0x0, 0x1d0c2, 0x0, 0x00018cc2, 0x0, 0x1bc02, 0x0, @@ -139,9 +155,10 @@ static inline void crc_init_x86(AVCRC *ctx, int le, int bits, uint32_t poly, int // convert to 32 degree polynomial poly_ = ((uint64_t)poly) << (32 - bits); - uint64_t x1, x2, x3, x4, x5, x6, x7, x8, div; - uint8_t *dst = (uint8_t*)ctx; + uint64_t div; + uint8_t *dst = (uint8_t*)(ctx + 1); if (le) { + ctx[0] = CLMUL_LE; AV_WN64(dst, xnmodp(4 * 128 - 32, poly_, 32, &div, le)); AV_WN64(dst + 8, xnmodp(4 * 128 + 32, poly_, 32, &div, le)); uint64_t tmp = xnmodp(128 - 32, poly_, 32, &div, le); @@ -152,6 +169,7 @@ static inline void crc_init_x86(AVCRC *ctx, int le, int bits, uint32_t poly, int AV_WN64(dst + 48, div); AV_WN64(dst + 56, reverse(poly_ | (1ULL << 32), 32)); } else { + ctx[0] = CLMUL_BE; AV_WN64(dst, xnmodp(4 * 128 + 64, poly_, 32, &div, le)); AV_WN64(dst + 8, xnmodp(4 * 128, poly_, 32, &div, le)); AV_WN64(dst + 16, xnmodp(128 + 64, poly_, 32, &div, le)); @@ -164,4 +182,42 @@ static inline void crc_init_x86(AVCRC *ctx, int le, int bits, uint32_t poly, int } #endif +static inline const AVCRC *ff_crc_get_table_x86(AVCRCId crc_id) +{ +#if HAVE_CLMUL_EXTERNAL + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_CLMUL(cpu_flags)) { + return crc_table_clmul[crc_id]; + } +#endif + return NULL; +} + +static inline av_cold int ff_crc_init_x86(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size) +{ +#if HAVE_CLMUL_EXTERNAL + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_CLMUL(cpu_flags)) { + crc_init_x86(ctx, le, bits, poly, ctx_size); + return 1; + } +#endif + return 0; +} + +static inline uint32_t ff_crc_x86(const AVCRC *ctx, uint32_t crc, + const uint8_t *buffer, size_t length) +{ + switch (ctx[0]) { +#if HAVE_CLMUL_EXTERNAL + case CLMUL_BE: return ff_crc_clmul(ctx, crc, buffer, length); + case CLMUL_LE: return ff_crc_le_clmul(ctx, crc, buffer, length); +#endif + default: av_unreachable("x86 CRC only uses CLMUL_BE and CLMUL_LE"); + } + return 0; +} + #endif /* AVUTIL_X86_CRC_H */ _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
