This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit cdb14bc74dfa4d7e274df9830418fc61da0796df
Author:     Georgii Zagoruiko <[email protected]>
AuthorDate: Mon Dec 8 19:45:32 2025 +0000
Commit:     Martin Storsjö <[email protected]>
CommitDate: Tue Dec 9 21:38:38 2025 +0000

    configure: add detection of assembler support for SME
    
    All changes are made during development/testing of SVE/SME for ffmpeg 
(vvc). Tested on Apple M4
---
 Makefile                                   |  2 +-
 configure                                  |  8 +++++++-
 ffbuild/arch.mak                           |  1 +
 libavutil/aarch64/Makefile                 |  2 ++
 libavutil/aarch64/asm.S                    |  9 +++++++++
 libavutil/aarch64/cpu.c                    | 12 ++++++++++++
 libavutil/aarch64/cpu.h                    |  5 +++++
 libavutil/aarch64/{cpu_sve.S => cpu_sme.S} |  8 +++++---
 libavutil/cpu.c                            |  1 +
 libavutil/cpu.h                            |  1 +
 libavutil/tests/cpu.c                      |  8 +++++++-
 tests/checkasm/checkasm.c                  |  9 ++++++++-
 12 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index f3fed75954..f563a37fca 100644
--- a/Makefile
+++ b/Makefile
@@ -111,7 +111,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS  
             \
                MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS         \
                MMI-OBJS LSX-OBJS LASX-OBJS RV-OBJS RVV-OBJS RVVB-OBJS    \
                OBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS SIMD128-OBJS   \
-               SVE-OBJS SVE2-OBJS
+               SVE-OBJS SVE2-OBJS SME-OBJS
 
 define RESET
 $(1) :=
diff --git a/configure b/configure
index 189e973501..15b33bb870 100755
--- a/configure
+++ b/configure
@@ -478,6 +478,7 @@ Optimization options (experts only):
   --disable-i8mm           disable I8MM optimizations
   --disable-sve            disable SVE optimizations
   --disable-sve2           disable SVE2 optimizations
+  --disable-sme            disable SME optimizations
   --disable-inline-asm     disable use of inline assembly
   --disable-x86asm         disable use of standalone x86 assembly
   --disable-mipsdsp        disable MIPS DSP ASE R1 optimizations
@@ -2224,6 +2225,7 @@ ARCH_EXT_LIST_ARM="
     setend
     sve
     sve2
+    sme
 "
 
 ARCH_EXT_LIST_MIPS="
@@ -2491,6 +2493,7 @@ TOOLCHAIN_FEATURES="
     as_archext_i8mm_directive
     as_archext_sve_directive
     as_archext_sve2_directive
+    as_archext_sme_directive
     as_dn_directive
     as_fpu_directive
     as_func
@@ -2823,6 +2826,7 @@ dotprod_deps="aarch64 neon"
 i8mm_deps="aarch64 neon"
 sve_deps="aarch64 neon"
 sve2_deps="aarch64 neon sve"
+sme_deps="aarch64 neon sve sve2"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
@@ -6447,11 +6451,12 @@ if enabled aarch64; then
     # internal assembler in clang 3.3 does not support this instruction
     enabled neon && check_insn neon 'ext   v0.8B, v0.8B, v1.8B, #1'
 
-    archext_list="dotprod i8mm sve sve2"
+    archext_list="dotprod i8mm sve sve2 sme"
     enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
     enabled i8mm    && check_archext_insn i8mm    'usdot v0.4s, v0.16b, v0.16b'
     enabled sve     && check_archext_insn sve     'whilelt p0.s, x0, x1'
     enabled sve2    && check_archext_insn sve2    'sqrdmulh z0.s, z0.s, z0.s'
+    enabled sme     && check_archext_insn sme     'smstop'
 
     # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
     # assembly support the feature out of the box. Skip this for the features
@@ -8211,6 +8216,7 @@ if enabled aarch64; then
     echo "I8MM enabled              ${i8mm-no}"
     echo "SVE enabled               ${sve-no}"
     echo "SVE2 enabled              ${sve2-no}"
+    echo "SME enabled               ${sme-no}"
 fi
 if enabled arm; then
     echo "ARMv5TE enabled           ${armv5te-no}"
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index ec79ae7866..83d6bf276f 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -5,6 +5,7 @@ OBJS-$(HAVE_VFP)     += $(VFP-OBJS)     $(VFP-OBJS-yes)
 OBJS-$(HAVE_NEON)    += $(NEON-OBJS)    $(NEON-OBJS-yes)
 OBJS-$(HAVE_SVE)     += $(SVE-OBJS)     $(SVE-OBJS-yes)
 OBJS-$(HAVE_SVE2)    += $(SVE2-OBJS)    $(SVE2-OBJS-yes)
+OBJS-$(HAVE_SME)     += $(SME-OBJS)     $(SME-OBJS-yes)
 
 OBJS-$(HAVE_MIPSFPU)   += $(MIPSFPU-OBJS)    $(MIPSFPU-OBJS-yes)
 OBJS-$(HAVE_MIPSDSP)   += $(MIPSDSP-OBJS)    $(MIPSDSP-OBJS-yes)
diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
index 992e95e4df..744c2c53d7 100644
--- a/libavutil/aarch64/Makefile
+++ b/libavutil/aarch64/Makefile
@@ -6,3 +6,5 @@ NEON-OBJS += aarch64/float_dsp_neon.o                           
      \
              aarch64/tx_float_neon.o                                  \
 
 SVE-OBJS += aarch64/cpu_sve.o                                         \
+
+SME-OBJS += aarch64/cpu_sme.o                                         \
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index 2e4e451ec2..77cea57cfc 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -72,10 +72,19 @@
 #define DISABLE_SVE2
 #endif
 
+#if HAVE_AS_ARCHEXT_SME_DIRECTIVE
+#define ENABLE_SME   .arch_extension sme
+#define DISABLE_SME  .arch_extension nosme
+#else
+#define ENABLE_SME
+#define DISABLE_SME
+#endif
+
 DISABLE_DOTPROD
 DISABLE_I8MM
 DISABLE_SVE
 DISABLE_SVE2
+DISABLE_SME
 
 
 /* Support macros for
diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index e82c0f19ab..3394963303 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -28,6 +28,7 @@
 #define HWCAP_AARCH64_SVE     (1 << 22)
 #define HWCAP2_AARCH64_SVE2   (1 << 1)
 #define HWCAP2_AARCH64_I8MM   (1 << 13)
+#define HWCAP2_AARCH64_SME    (1 << 23)
 
 static int detect_flags(void)
 {
@@ -44,6 +45,8 @@ static int detect_flags(void)
         flags |= AV_CPU_FLAG_SVE2;
     if (hwcap2 & HWCAP2_AARCH64_I8MM)
         flags |= AV_CPU_FLAG_I8MM;
+    if (hwcap & HWCAP2_AARCH64_SME)
+        flags |= AV_CPU_FLAG_SME;
 
     return flags;
 }
@@ -67,6 +70,8 @@ static int detect_flags(void)
         flags |= AV_CPU_FLAG_DOTPROD;
     if (have_feature("hw.optional.arm.FEAT_I8MM"))
         flags |= AV_CPU_FLAG_I8MM;
+    if (have_feature("hw.optional.arm.FEAT_SME"))
+        flags |= AV_CPU_FLAG_SME;
 
     return flags;
 }
@@ -133,6 +138,10 @@ static int detect_flags(void)
 #ifdef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
     if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE))
         flags |= AV_CPU_FLAG_SVE2;
+#endif
+#ifdef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
+    if (IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE))
+        flags |= AV_CPU_FLAG_SME;
 #endif
     return flags;
 }
@@ -162,6 +171,9 @@ int ff_get_cpu_flags_aarch64(void)
 #ifdef __ARM_FEATURE_SVE2
     flags |= AV_CPU_FLAG_SVE2;
 #endif
+#ifdef __ARM_FEATURE_SME
+    flags |= AV_CPU_FLAG_SME;
+#endif
 
     flags |= detect_flags();
 
diff --git a/libavutil/aarch64/cpu.h b/libavutil/aarch64/cpu.h
index a41b729659..62d5eb768f 100644
--- a/libavutil/aarch64/cpu.h
+++ b/libavutil/aarch64/cpu.h
@@ -29,9 +29,14 @@
 #define have_i8mm(flags)    CPUEXT(flags, I8MM)
 #define have_sve(flags)     CPUEXT(flags, SVE)
 #define have_sve2(flags)    CPUEXT(flags, SVE2)
+#define have_sme(flags)     CPUEXT(flags, SME)
 
 #if HAVE_SVE
 int ff_aarch64_sve_length(void);
 #endif
 
+#if HAVE_SME
+int ff_aarch64_sme_length(void);
+#endif
+
 #endif /* AVUTIL_AARCH64_CPU_H */
diff --git a/libavutil/aarch64/cpu_sve.S b/libavutil/aarch64/cpu_sme.S
similarity index 87%
copy from libavutil/aarch64/cpu_sve.S
copy to libavutil/aarch64/cpu_sme.S
index d216ed2c49..ba79d483a1 100644
--- a/libavutil/aarch64/cpu_sve.S
+++ b/libavutil/aarch64/cpu_sme.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Martin Storsjo
+ * Copyright (c) 2025 Georgii Zagoruiko
  *
  * This file is part of FFmpeg.
  *
@@ -21,9 +21,11 @@
 #include "config.h"
 #include "asm.S"
 
-ENABLE_SVE
+ENABLE_SME
 
-function ff_aarch64_sve_length, export=1
+function ff_aarch64_sme_length, export=1
+        smstart
         cntb            x0
+        smstop
         ret
 endfunc
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 8f9b785ebc..5aed2f39dc 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -186,6 +186,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "i8mm",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM    
 },    .unit = "flags" },
         { "sve",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE     
 },    .unit = "flags" },
         { "sve2",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE2    
 },    .unit = "flags" },
+        { "sme",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME     
 },    .unit = "flags" },
 #elif ARCH_MIPS
         { "mmi",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI     
 },    .unit = "flags" },
         { "msa",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA     
 },    .unit = "flags" },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index a06fc08e56..87cecd0424 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -76,6 +76,7 @@
 #define AV_CPU_FLAG_I8MM         (1 << 9)
 #define AV_CPU_FLAG_SVE          (1 <<10)
 #define AV_CPU_FLAG_SVE2         (1 <<11)
+#define AV_CPU_FLAG_SME          (1 <<12)
 #define AV_CPU_FLAG_SETEND       (1 <<16)
 
 #define AV_CPU_FLAG_MMI          (1 << 0)
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index fd2e32901d..c63b7e7d53 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -48,6 +48,7 @@ static const struct {
     { AV_CPU_FLAG_I8MM,      "i8mm"       },
     { AV_CPU_FLAG_SVE,       "sve"        },
     { AV_CPU_FLAG_SVE2,      "sve2"       },
+    { AV_CPU_FLAG_SME,       "sme"        },
 #elif ARCH_ARM
     { AV_CPU_FLAG_ARMV5TE,   "armv5te"    },
     { AV_CPU_FLAG_ARMV6,     "armv6"      },
@@ -174,7 +175,12 @@ int main(int argc, char **argv)
 #if ARCH_AARCH64 && HAVE_SVE
     if (cpu_flags_raw & AV_CPU_FLAG_SVE)
         printf("sve_vector_length = %d\n", 8 * ff_aarch64_sve_length());
-#elif ARCH_RISCV && HAVE_RVV
+#endif
+#if ARCH_AARCH64 && HAVE_SME
+    if (cpu_flags_raw & AV_CPU_FLAG_SME)
+        printf("sme_vector_length = %d\n", 8 * ff_aarch64_sme_length());
+#endif
+#if ARCH_RISCV && HAVE_RVV
     if (cpu_flags_raw & AV_CPU_FLAG_RVV_I32) {
         size_t bytes = ff_get_rv_vlenb();
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 14faf71275..54665c2fad 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -362,6 +362,7 @@ static const struct {
     { "I8MM",     "i8mm",     AV_CPU_FLAG_I8MM },
     { "SVE",      "sve",      AV_CPU_FLAG_SVE },
     { "SVE2",     "sve2",     AV_CPU_FLAG_SVE2 },
+    { "SME",      "sme",      AV_CPU_FLAG_SME },
 #elif ARCH_ARM
     { "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
     { "ARMV6",    "armv6",    AV_CPU_FLAG_ARMV6 },
@@ -1039,7 +1040,13 @@ int main(int argc, char *argv[])
     if (have_sve(av_get_cpu_flags()))
         snprintf(arch_info_buf, sizeof(arch_info_buf),
                  "SVE %d bits, ", 8 * ff_aarch64_sve_length());
-#elif ARCH_RISCV && HAVE_RVV
+#endif
+#if ARCH_AARCH64 && HAVE_SME
+    if (have_sme(av_get_cpu_flags()))
+        snprintf(arch_info_buf, sizeof(arch_info_buf),
+                 "SME %d bits, ", 8 * ff_aarch64_sme_length());
+#endif
+#if ARCH_RISCV && HAVE_RVV
     if (av_get_cpu_flags() & AV_CPU_FLAG_RVV_I32)
         snprintf(arch_info_buf, sizeof (arch_info_buf),
                  "%zu-bit vectors, ", 8 * ff_get_rv_vlenb());

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to