Broadwell and later have fast gather instructions.
---
 This is so that the avx2 version of ff_hscale8to15X which uses gather
 instructions is only selected on machines where it will actually be
 faster.
 libavutil/cpu.c          |  6 ++++++
 libavutil/cpu.h          |  6 ++++++
 libavutil/cpu_internal.h |  1 +
 libavutil/x86/cpu.c      | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 8960415d00..0a723eeb7a 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -49,6 +49,12 @@
 
 static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1);
 
+int av_cpu_has_fast_gather(void){
+    if (ARCH_X86)
+        return ff_cpu_has_fast_gather();
+    return 0;
+}
+
 static int get_cpu_flags(void)
 {
     if (ARCH_MIPS)
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index b555422dae..faf3a221f4 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -72,6 +72,7 @@
 #define AV_CPU_FLAG_MMI          (1 << 0)
 #define AV_CPU_FLAG_MSA          (1 << 1)
 
+int av_cpu_has_fast_gather(void);
 /**
  * Return the flags which specify extensions supported by the CPU.
  * The returned value is affected by av_force_cpu_flags() if that was used
@@ -107,6 +108,11 @@ int av_cpu_count(void);
  *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
  *  present.
  */
+
+/**
+ * Returns true if the cpu has fast gather instructions.
+ * Broadwell and later cpus have fast gather
+ */
 size_t av_cpu_max_align(void);
 
 #endif /* AVUTIL_CPU_H */
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 889764320b..92525df0c1 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -46,6 +46,7 @@ int ff_get_cpu_flags_aarch64(void);
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
 int ff_get_cpu_flags_x86(void);
+int ff_cpu_has_fast_gather(void);
 
 size_t ff_get_cpu_max_align_mips(void);
 size_t ff_get_cpu_max_align_aarch64(void);
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index bcd41a50a2..9724e0017b 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -270,3 +270,21 @@ size_t ff_get_cpu_max_align_x86(void)
 
     return 8;
 }
+
+int ff_cpu_has_fast_gather(void){
+    int eax, ebx, ecx;
+    int max_std_level, std_caps = 0;
+    int family = 0, model = 0;
+    cpuid(0, max_std_level, ebx, ecx, std_caps);
+
+    if (max_std_level >= 1) {
+        cpuid(1, eax, ebx, ecx, std_caps);
+        family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+        model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
+        // Broadwell and later
+        if(family == 6 && model >= 70){
+          return 1;
+        }
+    }
+    return 0;
+}
-- 
2.32.0.272.g935e593368-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to