Broadwell and later have fast gather instructions. --- This is so that the avx2 version of ff_hscale8to15X which uses gather instructions is only selected on machines where it will actually be faster. libavutil/cpu.c | 6 ++++++ libavutil/cpu.h | 6 ++++++ libavutil/cpu_internal.h | 1 + libavutil/x86/cpu.c | 18 ++++++++++++++++++ 4 files changed, 31 insertions(+)
diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 8960415d00..0a723eeb7a 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -49,6 +49,12 @@ static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1); +int av_cpu_has_fast_gather(void){ + if (ARCH_X86) + return ff_cpu_has_fast_gather(); + return 0; +} + static int get_cpu_flags(void) { if (ARCH_MIPS) diff --git a/libavutil/cpu.h b/libavutil/cpu.h index b555422dae..faf3a221f4 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -72,6 +72,7 @@ #define AV_CPU_FLAG_MMI (1 << 0) #define AV_CPU_FLAG_MSA (1 << 1) +int av_cpu_has_fast_gather(void); /** * Return the flags which specify extensions supported by the CPU. * The returned value is affected by av_force_cpu_flags() if that was used @@ -107,6 +108,11 @@ int av_cpu_count(void); * av_set_cpu_flags_mask(), then this function will behave as if AVX is not * present. */ + +/** + * Returns true if the cpu has fast gather instructions. + * Broadwell and later cpus have fast gather + */ size_t av_cpu_max_align(void); #endif /* AVUTIL_CPU_H */ diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 889764320b..92525df0c1 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -46,6 +46,7 @@ int ff_get_cpu_flags_aarch64(void); int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); +int ff_cpu_has_fast_gather(void); size_t ff_get_cpu_max_align_mips(void); size_t ff_get_cpu_max_align_aarch64(void); diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index bcd41a50a2..9724e0017b 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -270,3 +270,21 @@ size_t ff_get_cpu_max_align_x86(void) return 8; } + +int ff_cpu_has_fast_gather(void){ + int eax, ebx, ecx; + int max_std_level, std_caps = 0; + int family = 0, model = 0; + cpuid(0, max_std_level, ebx, ecx, std_caps); + + if (max_std_level >= 1) { + cpuid(1, eax, ebx, ecx, std_caps); + family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0); + // Broadwell and later + if(family == 6 && model >= 70){ + return 1; + } + } + return 0; +} -- 2.32.0.272.g935e593368-goog _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".