DavidSpickett created this revision.
Herald added subscribers: ctetreau, kristof.beyls.
Herald added a project: All.
DavidSpickett requested review of this revision.
Herald added subscribers: lldb-commits, alextsao1999.
Herald added a project: LLDB.

The Scalable Matrix Extension (SME) adds a new Scalable Vector mode
called "streaming SVE mode".

In this mode a lot of things change, but my understanding overall
is that this mode assumes you are not going to move data out of
the vector unit very often or read flags.

Based on "E1.3" of "ArmĀ® Architecture Reference Manual Supplement,
The Scalable Matrix Extension (SME), for Armv9-A".

https://developer.arm.com/documentation/ddi0616/latest/

The important details for debug are that this adds another set
of SVE registers. This set is only active when we are in streaming
mode and is read from a new ptrace regset NT_ARM_SSVE.
We are able to read the header of either mode at all times but
only one will be active and contain register data.

SIMD registers must be read and written via the SVE regset when
in SSVE mode. Writing to them exits streaming mode.

"Note that when SME is present and streaming SVE mode is in use the
FPSIMD subset of registers will be read via NT_ARM_SVE and NT_ARM_SVE
writes will exit streaming mode in the target."

https://kernel.org/doc/html/v6.2/arm64/sve.html

The streaming mode registers do not have different names in the
architecture, so I do not plan to allow users to read or write the
inactive mode's registers. "z0" will always mean "z0" of the active
mode.

(ptrace does allow this but you would be reading invalid state,
and writing switches you into that mode which is probably not what
you want)

I've chosen to have 2 sets of state in the register context.
I did try reusing the same one, but it gets tricky to read SIMD
while in streaming mode.

Existing SVE tests have been updated to check streaming mode and
mode switches. However, we are limited in what we can check given
that state for the other mode is trashed on mode switch.

The only way to know what mode you are in for testing purposes would
be to execute a streaming only, or non-streaming only instruction in
the opposite mode. However, the CPU feature smefa64 actually allows
all non-streaming mode instructions in streaming mode.

This is enabled by default in QEMU emulation and rather than mess
about trying to disable it I'm just going to use the pseduo streaming
control register added in a later patch to make the testing a bit
more robust.

A new test has been added to check SIMD read/write from all the modes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154926

Files:
  lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
  lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
  lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
  lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
  lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
  
lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py
  lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c

Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c
@@ -0,0 +1,108 @@
+#include <stdint.h>
+#include <sys/prctl.h>
+
+void write_simd_regs() {
+#define WRITE_SIMD(NUM)                                                        \
+  asm volatile("MOV v" #NUM ".d[0], %0\n\t"                                    \
+               "MOV v" #NUM ".d[1], %0\n\t" ::"r"(NUM))
+
+  WRITE_SIMD(0);
+  WRITE_SIMD(1);
+  WRITE_SIMD(2);
+  WRITE_SIMD(3);
+  WRITE_SIMD(4);
+  WRITE_SIMD(5);
+  WRITE_SIMD(6);
+  WRITE_SIMD(7);
+  WRITE_SIMD(8);
+  WRITE_SIMD(9);
+  WRITE_SIMD(10);
+  WRITE_SIMD(11);
+  WRITE_SIMD(12);
+  WRITE_SIMD(13);
+  WRITE_SIMD(14);
+  WRITE_SIMD(15);
+  WRITE_SIMD(16);
+  WRITE_SIMD(17);
+  WRITE_SIMD(18);
+  WRITE_SIMD(19);
+  WRITE_SIMD(20);
+  WRITE_SIMD(21);
+  WRITE_SIMD(22);
+  WRITE_SIMD(23);
+  WRITE_SIMD(24);
+  WRITE_SIMD(25);
+  WRITE_SIMD(26);
+  WRITE_SIMD(27);
+  WRITE_SIMD(28);
+  WRITE_SIMD(29);
+  WRITE_SIMD(30);
+  WRITE_SIMD(31);
+}
+
+unsigned verify_simd_regs() {
+  uint64_t got_low = 0;
+  uint64_t got_high = 0;
+  uint64_t target = 0;
+
+#define VERIFY_SIMD(NUM)                                                       \
+  do {                                                                         \
+    got_low = 0;                                                               \
+    got_high = 0;                                                              \
+    asm volatile("MOV %0, v" #NUM ".d[0]\n\t"                                  \
+                 "MOV %1, v" #NUM ".d[1]\n\t"                                  \
+                 : "=r"(got_low), "=r"(got_high));                             \
+    target = NUM + 1;                                                          \
+    if ((got_low != target) || (got_high != target))                           \
+      return 1;                                                                \
+  } while (0)
+
+  VERIFY_SIMD(0);
+  VERIFY_SIMD(1);
+  VERIFY_SIMD(2);
+  VERIFY_SIMD(3);
+  VERIFY_SIMD(4);
+  VERIFY_SIMD(5);
+  VERIFY_SIMD(6);
+  VERIFY_SIMD(7);
+  VERIFY_SIMD(8);
+  VERIFY_SIMD(9);
+  VERIFY_SIMD(10);
+  VERIFY_SIMD(11);
+  VERIFY_SIMD(12);
+  VERIFY_SIMD(13);
+  VERIFY_SIMD(14);
+  VERIFY_SIMD(15);
+  VERIFY_SIMD(16);
+  VERIFY_SIMD(17);
+  VERIFY_SIMD(18);
+  VERIFY_SIMD(19);
+  VERIFY_SIMD(20);
+  VERIFY_SIMD(21);
+  VERIFY_SIMD(22);
+  VERIFY_SIMD(23);
+  VERIFY_SIMD(24);
+  VERIFY_SIMD(25);
+  VERIFY_SIMD(26);
+  VERIFY_SIMD(27);
+  VERIFY_SIMD(28);
+  VERIFY_SIMD(29);
+  VERIFY_SIMD(30);
+  VERIFY_SIMD(31);
+
+  return 0;
+}
+
+int main() {
+#ifdef SSVE
+  asm volatile("msr  s0_3_c4_c7_3, xzr" /*smstart*/);
+#elif defined SVE
+  // Make the non-streaming SVE registers active.
+  asm volatile("cpy  z0.b, p0/z, #1\n\t");
+#endif
+  // else test plain SIMD access.
+
+  write_simd_regs();
+
+  return verify_simd_regs(); // Set a break point here.
+}
Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py
@@ -0,0 +1,100 @@
+"""
+Test that LLDB correctly reads and writes AArch64 SIMD registers in SVE,
+streaming SVE and normal SIMD modes.
+
+In SIMD mode data comes from the SIMD regset but in SVE mode, it comes from
+the SVE regset. In streaming mode it also comes from the SVE regset.
+"""
+
+from enum import Enum
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class Mode(Enum):
+    SIMD = 0
+    SVE = 1
+    SSVE = 2
+
+class SVESIMDRegistersTestCase(TestBase):
+    def get_build_flags(self, mode):
+        cflags = "-march=armv8-a+sve"
+        if mode == Mode.SSVE:
+            cflags += " -DSSVE"
+        elif mode == Mode.SVE:
+            cflags += " -DSVE"
+
+        return {"CFLAGS_EXTRAS": cflags}
+
+    def skip_if_needed(self, mode):
+        if (mode == Mode.SVE) and not self.isAArch64SVE():
+            self.skipTest("SVE registers must be supported.")
+
+        if (mode == Mode.SSVE) and not self.isAArch64SME():
+            self.skipTest("SSVE registers must be supported.")
+
+    def make_simd_value(self, n):
+        pad = " ".join(["0x00"] * 7)
+        return "{{0x{:02x} {} 0x{:02x} {}}}".format(n, pad, n, pad)
+
+    def sve_simd_registers_impl(self, mode):
+        self.skip_if_needed(mode)
+
+        self.build(dictionary=self.get_build_flags(mode))
+        self.line = line_number("main.c", "// Set a break point here.")
+
+        exe = self.getBuildArtifact("a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+        lldbutil.run_break_set_by_file_and_line(
+            self, "main.c", self.line, num_expected_locations=1
+        )
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        self.expect(
+            "thread backtrace",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stop reason = breakpoint 1."],
+        )
+
+        # These are 128 bit registers, so getting them from the API as unsigned
+        # values doesn't work. Check the command output instead.
+        for i in range(32):
+            self.expect("register read v{}".format(i),
+                substrs=[self.make_simd_value(i)])
+
+        # Write a new set of values. The kernel will move the program back to
+        # non-streaming mode here.
+        for i in range(32):
+            self.runCmd("register write v{} \"{}\"".format(
+                i, self.make_simd_value(i+1)))
+
+        # Should be visible within lldb.
+        for i in range(32):
+            self.expect("register read v{}".format(i),
+                substrs=[self.make_simd_value(i+1)])
+
+        # The program should agree with lldb.
+        self.expect("continue", substrs=["exited with status = 0"])
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_simd_registers_sve(self):
+        """Test read/write of SIMD registers when in SVE mode."""
+        self.sve_simd_registers_impl(Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_simd_registers_ssve(self):
+        """Test read/write of SIMD registers when in SSVE mode."""
+        self.sve_simd_registers_impl(Mode.SSVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_simd_registers_simd(self):
+        """Test read/write of SIMD registers when in SIMD mode."""
+        self.sve_simd_registers_impl(Mode.SIMD)
\ No newline at end of file
Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
+++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
@@ -1,5 +1,3 @@
 C_SOURCES := main.c
 
-CFLAGS_EXTRAS := -march=armv8-a+sve
-
 include Makefile.rules
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
@@ -1,6 +1,15 @@
+#include <stdbool.h>
 #include <sys/prctl.h>
 
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#define SMSTART() asm volatile("msr  s0_3_c4_c7_3, xzr" /*smstart*/)
+
 void write_sve_regs() {
+  // We assume the smefa64 feature is present, which allows ffr access
+  // in streaming mode.
   asm volatile("setffr\n\t");
   asm volatile("ptrue p0.b\n\t");
   asm volatile("ptrue p1.h\n\t");
@@ -53,18 +62,85 @@
   asm volatile("cpy  z31.b, p15/z, #32\n\t");
 }
 
+// Set some different values so we can tell if lldb correctly returns to the set
+// above after the expression is finished.
+void write_sve_regs_expr() {
+  // pfalse only operates on the "b" aka byte element size.
+  asm volatile("pfalse p0.b\n\t");
+  asm volatile("wrffr p0.b\n\t");
+  asm volatile("pfalse p1.b\n\t");
+  asm volatile("pfalse p2.b\n\t");
+  asm volatile("pfalse p3.b\n\t");
+  asm volatile("ptrue p4.b\n\t");
+  asm volatile("pfalse p5.b\n\t");
+  asm volatile("pfalse p6.b\n\t");
+  asm volatile("pfalse p7.b\n\t");
+  asm volatile("pfalse p8.b\n\t");
+  asm volatile("ptrue p9.b\n\t");
+  asm volatile("pfalse p10.b\n\t");
+  asm volatile("pfalse p11.b\n\t");
+  asm volatile("pfalse p12.b\n\t");
+  asm volatile("pfalse p13.b\n\t");
+  asm volatile("ptrue p14.b\n\t");
+  asm volatile("pfalse p15.b\n\t");
+
+  asm volatile("cpy  z0.b, p0/z, #2\n\t");
+  asm volatile("cpy  z1.b, p5/z, #3\n\t");
+  asm volatile("cpy  z2.b, p10/z, #4\n\t");
+  asm volatile("cpy  z3.b, p15/z, #5\n\t");
+  asm volatile("cpy  z4.b, p0/z, #6\n\t");
+  asm volatile("cpy  z5.b, p5/z, #7\n\t");
+  asm volatile("cpy  z6.b, p10/z, #8\n\t");
+  asm volatile("cpy  z7.b, p15/z, #9\n\t");
+  asm volatile("cpy  z8.b, p0/z, #10\n\t");
+  asm volatile("cpy  z9.b, p5/z, #11\n\t");
+  asm volatile("cpy  z10.b, p10/z, #12\n\t");
+  asm volatile("cpy  z11.b, p15/z, #13\n\t");
+  asm volatile("cpy  z12.b, p0/z, #14\n\t");
+  asm volatile("cpy  z13.b, p5/z, #15\n\t");
+  asm volatile("cpy  z14.b, p10/z, #16\n\t");
+  asm volatile("cpy  z15.b, p15/z, #17\n\t");
+  asm volatile("cpy  z16.b, p0/z, #18\n\t");
+  asm volatile("cpy  z17.b, p5/z, #19\n\t");
+  asm volatile("cpy  z18.b, p10/z, #20\n\t");
+  asm volatile("cpy  z19.b, p15/z, #21\n\t");
+  asm volatile("cpy  z20.b, p0/z, #22\n\t");
+  asm volatile("cpy  z21.b, p5/z, #23\n\t");
+  asm volatile("cpy  z22.b, p10/z, #24\n\t");
+  asm volatile("cpy  z23.b, p15/z, #25\n\t");
+  asm volatile("cpy  z24.b, p0/z, #26\n\t");
+  asm volatile("cpy  z25.b, p5/z, #27\n\t");
+  asm volatile("cpy  z26.b, p10/z, #28\n\t");
+  asm volatile("cpy  z27.b, p15/z, #29\n\t");
+  asm volatile("cpy  z28.b, p0/z, #30\n\t");
+  asm volatile("cpy  z29.b, p5/z, #31\n\t");
+  asm volatile("cpy  z30.b, p10/z, #32\n\t");
+  asm volatile("cpy  z31.b, p15/z, #33\n\t");
+}
+
 // This function will be called using jitted expression call. We change vector
 // length and write SVE registers. Our program context should restore to
 // orignal vector length and register values after expression evaluation.
-int expr_eval_func() {
-  prctl(PR_SVE_SET_VL, 8 * 2);
-  write_sve_regs();
-  prctl(PR_SVE_SET_VL, 8 * 4);
-  write_sve_regs();
+int expr_eval_func(bool streaming) {
+  int SET_VL_OPT = streaming ? PR_SME_SET_VL : PR_SVE_SET_VL;
+  prctl(SET_VL_OPT, 8 * 2);
+  // Note that doing a syscall brings you back to non-streaming mode, so we
+  // don't need to SMSTOP here.
+  if (streaming)
+    SMSTART();
+  write_sve_regs_expr();
+  prctl(SET_VL_OPT, 8 * 4);
+  if (streaming)
+    SMSTART();
+  write_sve_regs_expr();
   return 1;
 }
 
 int main() {
+#ifdef START_SSVE
+  SMSTART();
+#endif
   write_sve_regs();
+
   return 0; // Set a break point here.
 }
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
@@ -2,11 +2,15 @@
 Test the AArch64 SVE registers.
 """
 
+from enum import Enum
 import lldb
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
 
+class Mode(Enum):
+    SVE = 0
+    SSVE = 1
 
 class RegisterCommandsTestCase(TestBase):
     def check_sve_register_size(self, set, name, expected):
@@ -61,20 +65,28 @@
 
         self.expect("register read " + "ffr", substrs=[p_regs_value])
 
-    @no_debug_info_test
-    @skipIf(archs=no_match(["aarch64"]))
-    @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_configuration(self):
-        """Test AArch64 SVE registers size configuration."""
-        self.build()
+    def get_build_flags(self, mode):
+        cflags = "-march=armv8-a+sve"
+        if mode == Mode.SSVE:
+            cflags += " -DSTART_SSVE"
+        return {"CFLAGS_EXTRAS": cflags}
+
+    def skip_if_needed(self, mode):
+        if (mode == Mode.SVE) and not self.isAArch64SVE():
+            self.skipTest("SVE registers must be supported.")
+
+        if (mode == Mode.SSVE) and not self.isAArch64SME():
+            self.skipTest("SSVE registers must be supported.")
+
+    def sve_registers_configuration_impl(self, mode):
+        self.skip_if_needed(mode)
+
+        self.build(dictionary=self.get_build_flags(mode))
         self.line = line_number("main.c", "// Set a break point here.")
 
         exe = self.getBuildArtifact("a.out")
         self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
 
-        if not self.isAArch64SVE():
-            self.skipTest("SVE registers must be supported.")
-
         lldbutil.run_break_set_by_file_and_line(
             self, "main.c", self.line, num_expected_locations=1
         )
@@ -91,26 +103,17 @@
         thread = process.GetThreadAtIndex(0)
         currentFrame = thread.GetFrameAtIndex(0)
 
-        has_sve = False
-        for registerSet in currentFrame.GetRegisters():
-            if "Scalable Vector Extension Registers" in registerSet.GetName():
-                has_sve = True
-
         registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
-
-        sve_registers = registerSets.GetValueAtIndex(2)
-
-        vg_reg = sve_registers.GetChildMemberWithName("vg")
+        sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers")
+        self.assertTrue(sve_registers)
 
         vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
 
         z_reg_size = vg_reg_value * 8
-
-        p_reg_size = z_reg_size / 8
-
         for i in range(32):
             self.check_sve_register_size(sve_registers, "z%i" % (i), z_reg_size)
 
+        p_reg_size = z_reg_size / 8
         for i in range(16):
             self.check_sve_register_size(sve_registers, "p%i" % (i), p_reg_size)
 
@@ -119,17 +122,26 @@
     @no_debug_info_test
     @skipIf(archs=no_match(["aarch64"]))
     @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_read_write(self):
-        """Test AArch64 SVE registers read and write."""
-        self.build()
-        self.line = line_number("main.c", "// Set a break point here.")
+    def test_sve_registers_configuration(self):
+        """Test AArch64 SVE registers size configuration."""
+        self.sve_registers_configuration_impl(Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_ssve_registers_configuration(self):
+        """Test AArch64 SSVE registers size configuration."""
+        self.sve_registers_configuration_impl(Mode.SSVE)
+
+    def sve_registers_read_write_impl(self, start_mode, eval_mode):
+        self.skip_if_needed(start_mode)
+        self.skip_if_needed(eval_mode)
+        self.build(dictionary=self.get_build_flags(start_mode))
 
         exe = self.getBuildArtifact("a.out")
         self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
 
-        if not self.isAArch64SVE():
-            self.skipTest("SVE registers must be supported.")
-
+        self.line = line_number("main.c", "// Set a break point here.")
         lldbutil.run_break_set_by_file_and_line(
             self, "main.c", self.line, num_expected_locations=1
         )
@@ -143,34 +155,55 @@
 
         target = self.dbg.GetSelectedTarget()
         process = target.GetProcess()
-        thread = process.GetThreadAtIndex(0)
-        currentFrame = thread.GetFrameAtIndex(0)
-
-        has_sve = False
-        for registerSet in currentFrame.GetRegisters():
-            if "Scalable Vector Extension Registers" in registerSet.GetName():
-                has_sve = True
 
         registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
-
-        sve_registers = registerSets.GetValueAtIndex(2)
-
-        vg_reg = sve_registers.GetChildMemberWithName("vg")
+        sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers")
+        self.assertTrue(sve_registers)
 
         vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
-
         z_reg_size = vg_reg_value * 8
-
         self.check_sve_regs_read(z_reg_size)
 
         # Evaluate simple expression and print function expr_eval_func address.
         self.expect("expression expr_eval_func", substrs=["= 0x"])
 
         # Evaluate expression call function expr_eval_func.
-        self.expect_expr("expr_eval_func()", result_type="int", result_value="1")
+        self.expect_expr("expr_eval_func({})".format(
+            "true" if (eval_mode == Mode.SSVE) else "false"), result_type="int",
+            result_value="1")
 
         # We called a jitted function above which must not have changed SVE
         # vector length or register values.
         self.check_sve_regs_read(z_reg_size)
 
         self.check_sve_regs_read_after_write(z_reg_size)
+
+    # The following tests all setup some register values then evaluate an
+    # expression. After the expression, the mode and register values should be
+    # the same as before. Finally they read/write some values in the registers.
+    # The only difference is the mode we start the program in, and the mode
+    # the expression function uses.
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_sve_sve(self):
+        self.sve_registers_read_write_impl(Mode.SVE, Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_ssve_ssve(self):
+        self.sve_registers_read_write_impl(Mode.SSVE, Mode.SSVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_sve_ssve(self):
+        self.sve_registers_read_write_impl(Mode.SVE, Mode.SSVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_ssve_sve(self):
+        self.sve_registers_read_write_impl(Mode.SSVE, Mode.SVE)
\ No newline at end of file
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
@@ -1,5 +1,3 @@
 C_SOURCES := main.c
 
-CFLAGS_EXTRAS := -march=armv8-a+sve
-
 include Makefile.rules
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
@@ -1,6 +1,12 @@
 #include <pthread.h>
 #include <sys/prctl.h>
 
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#define SMSTART() asm volatile("msr  s0_3_c4_c7_3, xzr" /*smstart*/)
+
 static inline void write_sve_registers() {
   asm volatile("setffr\n\t");
   asm volatile("ptrue p0.b\n\t");
@@ -54,26 +60,41 @@
   asm volatile("cpy  z31.b, p15/z, #32\n\t");
 }
 
+int SET_VL_OPT = PR_SVE_SET_VL;
+
 void *threadX_func(void *x_arg) {
-  prctl(PR_SVE_SET_VL, 8 * 4);
+  prctl(SET_VL_OPT, 8 * 4);
+#ifdef USE_SSVE
+  SMSTART();
+#endif
   write_sve_registers();
   write_sve_registers(); // Thread X breakpoint 1
   return NULL;           // Thread X breakpoint 2
 }
 
 void *threadY_func(void *y_arg) {
-  prctl(PR_SVE_SET_VL, 8 * 2);
+  prctl(SET_VL_OPT, 8 * 2);
+#ifdef USE_SSVE
+  SMSTART();
+#endif
   write_sve_registers();
   write_sve_registers(); // Thread Y breakpoint 1
   return NULL;           // Thread Y breakpoint 2
 }
 
 int main() {
+#ifdef USE_SSVE
+  SET_VL_OPT = PR_SME_SET_VL;
+#endif
+
   /* this variable is our reference to the second thread */
   pthread_t x_thread, y_thread;
 
   /* Set vector length to 8 and write SVE registers values */
-  prctl(PR_SVE_SET_VL, 8 * 8);
+  prctl(SET_VL_OPT, 8 * 8);
+#ifdef USE_SSVE
+  SMSTART();
+#endif
   write_sve_registers();
 
   /* create a second thread which executes with argument x */
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
@@ -1,5 +1,6 @@
 """
-Test the AArch64 SVE registers dynamic resize with multiple threads.
+Test the AArch64 SVE and Streaming SVE (SSVE) registers dynamic resize with
+multiple threads.
 
 This test assumes a minimum supported vector length (VL) of 256 bits
 and will test 512 bits if possible. We refer to "vg" which is the
@@ -7,11 +8,15 @@
 the same as a vg of 4.
 """
 
+from enum import Enum
 import lldb
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
 
+class Mode(Enum):
+    SVE = 0
+    SSVE = 1
 
 class RegisterCommandsTestCase(TestBase):
     def get_supported_vg(self):
@@ -45,6 +50,9 @@
             if not self.res.GetError():
                 supported_vg.append(vg)
 
+        self.runCmd("breakpoint delete 1")
+        self.runCmd("continue")
+
         return supported_vg
 
     def check_sve_registers(self, vg_test_value):
@@ -88,24 +96,24 @@
 
         self.expect("register read ffr", substrs=[p_regs_value])
 
-    @no_debug_info_test
-    @skipIf(archs=no_match(["aarch64"]))
-    @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_dynamic_config(self):
-        """Test AArch64 SVE registers multi-threaded dynamic resize."""
-
-        if not self.isAArch64SVE():
+    def run_sve_test(self, mode):
+        if (mode == Mode.SVE) and not self.isAArch64SVE():
             self.skipTest("SVE registers must be supported.")
 
+        if (mode == Mode.SSVE) and not self.isAArch64SME():
+            self.skipTest("Streaming SVE registers must be supported.")
+
+        cflags = "-march=armv8-a+sve -lpthread"
+        if mode == Mode.SSVE:
+            cflags += " -DUSE_SSVE"
+        self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+
         self.build()
         supported_vg = self.get_supported_vg()
 
         if not (2 in supported_vg and 4 in supported_vg):
             self.skipTest("Not all required SVE vector lengths are supported.")
 
-        exe = self.getBuildArtifact("a.out")
-        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
-
         main_thread_stop_line = line_number("main.c", "// Break in main thread")
         lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
 
@@ -176,3 +184,17 @@
             elif stopped_at_line_number == thY_break_line2:
                 self.runCmd("thread select %d" % (idx + 1))
                 self.check_sve_registers(4)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_sve_registers_dynamic_config(self):
+        """Test AArch64 SVE registers multi-threaded dynamic resize."""
+        self.run_sve_test(Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_ssve_registers_dynamic_config(self):
+        """Test AArch64 SSVE registers multi-threaded dynamic resize."""
+        self.run_sve_test(Mode.SSVE)
Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
@@ -15,7 +15,7 @@
 #include "lldb/lldb-private.h"
 #include <map>
 
-enum class SVEState { Unknown, Disabled, FPSIMD, Full };
+enum class SVEState : uint8_t { Unknown, Disabled, FPSIMD, Full, Streaming };
 
 class RegisterInfoPOSIX_arm64
     : public lldb_private::RegisterInfoAndSetInterface {
@@ -26,9 +26,10 @@
   enum {
     eRegsetMaskDefault = 0,
     eRegsetMaskSVE = 1,
-    eRegsetMaskPAuth = 2,
-    eRegsetMaskMTE = 4,
-    eRegsetMaskTLS = 8,
+    eRegsetMaskSSVE = 2,
+    eRegsetMaskPAuth = 4,
+    eRegsetMaskMTE = 8,
+    eRegsetMaskTLS = 16,
     eRegsetMaskDynamic = ~1,
   };
 
@@ -115,6 +116,7 @@
   }
 
   bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
+  bool IsSSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); }
   bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
   bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
 
Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
+++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
@@ -212,7 +212,7 @@
     // dynamic register set like MTE, Pointer Authentication regset then we need
     // to create dynamic register infos and regset array. Push back all optional
     // register infos and regset and calculate register offsets accordingly.
-    if (m_opt_regsets.AllSet(eRegsetMaskSVE)) {
+    if (m_opt_regsets.AnySet(eRegsetMaskSVE | eRegsetMaskSSVE)) {
       m_register_info_p = g_register_infos_arm64_sve_le;
       m_register_info_count = sve_ffr + 1;
       m_per_regset_regnum_range[m_register_set_count++] =
Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
+++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
@@ -81,11 +81,9 @@
 private:
   bool m_gpr_is_valid;
   bool m_fpu_is_valid;
-  bool m_sve_buffer_is_valid;
   bool m_mte_ctrl_is_valid;
   bool m_tls_tpidr_is_valid;
 
-  bool m_sve_header_is_valid;
   bool m_pac_mask_is_valid;
 
   struct user_pt_regs m_gpr_arm64; // 64-bit general purpose registers.
@@ -94,8 +92,29 @@
       m_fpr; // floating-point registers including extended register sets.
 
   SVEState m_sve_state;
-  struct sve::user_sve_header m_sve_header;
-  std::vector<uint8_t> m_sve_ptrace_payload;
+
+  struct SVEStateData {
+    SVEStateData(unsigned regset)
+        : m_header_is_valid(false), m_buffer_is_valid(false), m_regset(regset) {
+      ::memset(&m_header, 0, sizeof(m_header));
+    }
+
+    void Invalidate() {
+      m_header_is_valid = false;
+      m_buffer_is_valid = false;
+    }
+
+    bool m_header_is_valid;
+    struct sve::user_sve_header m_header;
+    bool m_buffer_is_valid;
+    // For storing the full ptrace data.
+    std::vector<uint8_t> m_buffer;
+    // For the ptrace request.
+    unsigned m_regset;
+  };
+
+  SVEStateData m_sve_state_data;
+  SVEStateData m_ssve_state_data;
 
   bool m_refresh_hwdebug_info;
 
@@ -114,6 +133,12 @@
 
   bool IsFPR(unsigned reg) const;
 
+  SVEStateData &CurrentSVEStateData() {
+    if (m_sve_state == SVEState::Streaming)
+      return m_ssve_state_data;
+    return m_sve_state_data;
+  }
+
   Status ReadAllSVE();
 
   Status WriteAllSVE();
@@ -137,11 +162,9 @@
   bool IsMTE(unsigned reg) const;
   bool IsTLS(unsigned reg) const;
 
-  uint64_t GetSVERegVG() { return m_sve_header.vl / 8; }
+  uint64_t GetSVERegVG() { return CurrentSVEStateData().m_header.vl / 8; }
 
-  void SetSVERegVG(uint64_t vg) { m_sve_header.vl = vg * 8; }
-
-  void *GetSVEHeader() { return &m_sve_header; }
+  void SetSVERegVG(uint64_t vg) { CurrentSVEStateData().m_header.vl = vg * 8; }
 
   void *GetPACMask() { return &m_pac_mask; }
 
@@ -149,13 +172,15 @@
 
   void *GetTLSTPIDR() { return &m_tls_tpidr_reg; }
 
-  void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); };
+  void *GetSVEBuffer() { return CurrentSVEStateData().m_buffer.data(); }
 
-  size_t GetSVEHeaderSize() { return sizeof(m_sve_header); }
+  size_t GetSVEBufferSize() { return CurrentSVEStateData().m_buffer.size(); }
 
-  size_t GetPACMaskSize() { return sizeof(m_pac_mask); }
+  void *GetSVEHeader() { return &(CurrentSVEStateData().m_header); }
 
-  size_t GetSVEBufferSize() { return m_sve_ptrace_payload.size(); }
+  size_t GetSVEHeaderSize() { return sizeof(CurrentSVEStateData().m_header); }
+
+  size_t GetPACMaskSize() { return sizeof(m_pac_mask); }
 
   size_t GetMTEControlSize() { return sizeof(m_mte_ctrl_reg); }
 
Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
+++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
@@ -36,6 +36,11 @@
 #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension */
 #endif
 
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE                                                            \
+  0x40b /* ARM Scalable Matrix Extension, Streaming SVE mode */
+#endif
+
 #ifndef NT_ARM_PAC_MASK
 #define NT_ARM_PAC_MASK 0x406 /* Pointer authentication code masks */
 #endif
@@ -71,9 +76,20 @@
     if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET,
                                           native_thread.GetID(), &regset,
                                           &ioVec, sizeof(sve_header))
-            .Success())
+            .Success()) {
       opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSVE);
 
+      // We may also have the Scalable Matrix Extension (SME) which adds a
+      // streaming SVE mode.
+      ioVec.iov_len = sizeof(sve_header);
+      regset = NT_ARM_SSVE;
+      if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET,
+                                            native_thread.GetID(), &regset,
+                                            &ioVec, sizeof(sve_header))
+              .Success())
+        opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSSVE);
+    }
+
     NativeProcessLinux &process = native_thread.GetProcess();
 
     std::optional<uint64_t> auxv_at_hwcap =
@@ -109,12 +125,12 @@
     std::unique_ptr<RegisterInfoPOSIX_arm64> register_info_up)
     : NativeRegisterContextRegisterInfo(native_thread,
                                         register_info_up.release()),
-      NativeRegisterContextLinux(native_thread) {
+      NativeRegisterContextLinux(native_thread), m_sve_state_data(NT_ARM_SVE),
+      m_ssve_state_data(NT_ARM_SSVE) {
   ::memset(&m_fpr, 0, sizeof(m_fpr));
   ::memset(&m_gpr_arm64, 0, sizeof(m_gpr_arm64));
   ::memset(&m_hwp_regs, 0, sizeof(m_hwp_regs));
   ::memset(&m_hbp_regs, 0, sizeof(m_hbp_regs));
-  ::memset(&m_sve_header, 0, sizeof(m_sve_header));
   ::memset(&m_pac_mask, 0, sizeof(m_pac_mask));
 
   m_mte_ctrl_reg = 0;
@@ -128,13 +144,11 @@
 
   m_gpr_is_valid = false;
   m_fpu_is_valid = false;
-  m_sve_buffer_is_valid = false;
-  m_sve_header_is_valid = false;
   m_pac_mask_is_valid = false;
   m_mte_ctrl_is_valid = false;
   m_tls_tpidr_is_valid = false;
 
-  if (GetRegisterInfo().IsSVEEnabled())
+  if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled())
     m_sve_state = SVEState::Unknown;
   else
     m_sve_state = SVEState::Disabled;
@@ -203,26 +217,36 @@
       assert(offset < GetFPRSize());
       src = (uint8_t *)GetFPRBuffer() + offset;
     } else {
-      // SVE enabled, we will read and cache SVE ptrace data
+      // SVE enabled, we will read and cache SVE ptrace data.
+      // In SIMD or Full mode, the data comes from the SVE regset. In streaming
+      // mode, it also comes from that set, so we have to switch temporarily.
+      SVEState previous_sve_state = m_sve_state;
+      if (m_sve_state == SVEState::Streaming)
+        m_sve_state = SVEState::FPSIMD;
+
       error = ReadAllSVE();
-      if (error.Fail())
+      if (error.Fail()) {
+        m_sve_state = previous_sve_state;
         return error;
+      }
 
       // FPSR and FPCR will be located right after Z registers in
-      // SVEState::FPSIMD while in SVEState::Full they will be located at the
-      // end of register data after an alignment correction based on currently
-      // selected vector length.
+      // SVEState::FPSIMD while in SVEState::Full they
+      // will be located at the end of register data after an alignment
+      // correction based on currently selected vector length.
       uint32_t sve_reg_num = LLDB_INVALID_REGNUM;
       if (reg == GetRegisterInfo().GetRegNumFPSR()) {
         sve_reg_num = reg;
         if (m_sve_state == SVEState::Full)
-          offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl));
+          offset = sve::PTraceFPSROffset(
+              sve::vq_from_vl(CurrentSVEStateData().m_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16);
       } else if (reg == GetRegisterInfo().GetRegNumFPCR()) {
         sve_reg_num = reg;
         if (m_sve_state == SVEState::Full)
-          offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl));
+          offset = sve::PTraceFPCROffset(
+              sve::vq_from_vl(CurrentSVEStateData().m_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4;
       } else {
@@ -235,6 +259,10 @@
 
       assert(offset < GetSVEBufferSize());
       src = (uint8_t *)GetSVEBuffer() + offset;
+
+      // We have separate copies of streaming and non-streaming state, so src
+      // may point to an inactive mode but the pointer is still valid.
+      m_sve_state = previous_sve_state;
     }
   } else if (IsTLS(reg)) {
     error = ReadTLSTPIDR();
@@ -344,7 +372,14 @@
 
       return WriteFPR();
     } else {
-      // SVE enabled, we will read and cache SVE ptrace data
+      // SVE enabled, we will read and cache SVE ptrace data. Even when in
+      // streaming mode, we need to write to the non-streaming regset. Doing so
+      // also exits streaming mode and invalidates its state. Therefore we don't
+      // have to flush it before doing this.
+      SVEState previous_sve_state = m_sve_state;
+      if (m_sve_state == SVEState::Streaming)
+        m_sve_state = SVEState::FPSIMD;
+
       error = ReadAllSVE();
       if (error.Fail())
         return error;
@@ -357,13 +392,15 @@
       if (reg == GetRegisterInfo().GetRegNumFPSR()) {
         sve_reg_num = reg;
         if (m_sve_state == SVEState::Full)
-          offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl));
+          offset = sve::PTraceFPSROffset(
+              sve::vq_from_vl(CurrentSVEStateData().m_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16);
       } else if (reg == GetRegisterInfo().GetRegNumFPCR()) {
         sve_reg_num = reg;
         if (m_sve_state == SVEState::Full)
-          offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl));
+          offset = sve::PTraceFPCROffset(
+              sve::vq_from_vl(CurrentSVEStateData().m_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4;
       } else {
@@ -377,7 +414,17 @@
       assert(offset < GetSVEBufferSize());
       dst = (uint8_t *)GetSVEBuffer() + offset;
       ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size);
-      return WriteAllSVE();
+      Status write_result = WriteAllSVE();
+
+      if (previous_sve_state == SVEState::Streaming) {
+        // We have exited streaming mode, our vector length may have changed.
+        m_sve_state = SVEState::Unknown;
+        m_sve_state_data.Invalidate();
+        m_ssve_state_data.Invalidate();
+        ConfigureRegisterContext();
+      }
+
+      return write_result;
     }
   } else if (IsSVE(reg)) {
     if (m_sve_state == SVEState::Disabled || m_sve_state == SVEState::Unknown)
@@ -392,7 +439,8 @@
         uint64_t vg_value = reg_value.GetAsUInt64();
 
         if (sve::vl_valid(vg_value * 8)) {
-          if (m_sve_header_is_valid && vg_value == GetSVERegVG())
+          if (CurrentSVEStateData().m_header_is_valid &&
+              vg_value == GetSVERegVG())
             return error;
 
           SetSVERegVG(vg_value);
@@ -401,7 +449,8 @@
           if (error.Success())
             ConfigureRegisterContext();
 
-          if (m_sve_header_is_valid && vg_value == GetSVERegVG())
+          if (CurrentSVEStateData().m_header_is_valid &&
+              vg_value == GetSVERegVG())
             return error;
         }
 
@@ -480,8 +529,9 @@
 Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues(
     lldb::WritableDataBufferSP &data_sp) {
   // AArch64 register data must contain GPRs, either FPR or SVE registers
-  // and optional MTE register. Pointer Authentication (PAC) registers are
-  // read-only and will be skiped.
+  // (which can be non-streaming, SVE or streaming, SSVE) and optional MTE
+  // register. Pointer Authentication (PAC) registers are read-only and will be
+  // skiped.
 
   // In order to create register data checkpoint we first read all register
   // values if not done already and calculate total size of register set data.
@@ -495,8 +545,10 @@
     return error;
 
   // If SVE is enabled we need not copy FPR separately.
-  if (GetRegisterInfo().IsSVEEnabled()) {
+  if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) {
     reg_data_byte_size += GetSVEBufferSize();
+    // Also store the current SVE mode.
+    reg_data_byte_size += sizeof(uint32_t);
     error = ReadAllSVE();
   } else {
     reg_data_byte_size += GetFPRSize();
@@ -524,7 +576,9 @@
   ::memcpy(dst, GetGPRBuffer(), GetGPRBufferSize());
   dst += GetGPRBufferSize();
 
-  if (GetRegisterInfo().IsSVEEnabled()) {
+  if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) {
+    *dst = static_cast<uint8_t>(m_sve_state);
+    dst += sizeof(m_sve_state);
     ::memcpy(dst, GetSVEBuffer(), GetSVEBufferSize());
     dst += GetSVEBufferSize();
   } else {
@@ -594,16 +648,21 @@
       (data_sp->GetByteSize() > (reg_data_min_size + GetSVEHeaderSize()));
 
   if (contains_sve_reg_data) {
+    // Restore to the correct mode, streaming or not.
+    m_sve_state = static_cast<SVEState>(*src);
+    src += sizeof(m_sve_state);
+
     // We have SVE register data first write SVE header.
     ::memcpy(GetSVEHeader(), src, GetSVEHeaderSize());
-    if (!sve::vl_valid(m_sve_header.vl)) {
-      m_sve_header_is_valid = false;
+    SVEStateData &sve_state = CurrentSVEStateData();
+    if (!sve::vl_valid(sve_state.m_header.vl)) {
+      sve_state.m_header_is_valid = false;
       error.SetErrorStringWithFormat("NativeRegisterContextLinux_arm64::%s "
                                      "Invalid SVE header in data_sp",
                                      __FUNCTION__);
       return error;
     }
-    m_sve_header_is_valid = true;
+    sve_state.m_header_is_valid = true;
     error = WriteSVEHeader();
     if (error.Fail())
       return error;
@@ -622,7 +681,7 @@
     }
 
     ::memcpy(GetSVEBuffer(), src, GetSVEBufferSize());
-    m_sve_buffer_is_valid = true;
+    CurrentSVEStateData().m_buffer_is_valid = true;
     error = WriteAllSVE();
     src += GetSVEBufferSize();
   } else {
@@ -814,8 +873,8 @@
 void NativeRegisterContextLinux_arm64::InvalidateAllRegisters() {
   m_gpr_is_valid = false;
   m_fpu_is_valid = false;
-  m_sve_buffer_is_valid = false;
-  m_sve_header_is_valid = false;
+  m_sve_state_data.Invalidate();
+  m_ssve_state_data.Invalidate();
   m_pac_mask_is_valid = false;
   m_mte_ctrl_is_valid = false;
   m_tls_tpidr_is_valid = false;
@@ -826,18 +885,19 @@
 
 Status NativeRegisterContextLinux_arm64::ReadSVEHeader() {
   Status error;
+  SVEStateData &state = CurrentSVEStateData();
 
-  if (m_sve_header_is_valid)
+  if (state.m_header_is_valid)
     return error;
 
   struct iovec ioVec;
-  ioVec.iov_base = GetSVEHeader();
-  ioVec.iov_len = GetSVEHeaderSize();
+  ioVec.iov_base = &state.m_header;
+  ioVec.iov_len = sizeof(state.m_header);
 
-  error = ReadRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE);
+  error = ReadRegisterSet(&ioVec, sizeof(state.m_header), state.m_regset);
 
   if (error.Success())
-    m_sve_header_is_valid = true;
+    state.m_header_is_valid = true;
 
   return error;
 }
@@ -862,36 +922,39 @@
 
 Status NativeRegisterContextLinux_arm64::WriteSVEHeader() {
   Status error;
+  SVEStateData &state = CurrentSVEStateData();
 
   error = ReadSVEHeader();
   if (error.Fail())
     return error;
 
   struct iovec ioVec;
-  ioVec.iov_base = GetSVEHeader();
-  ioVec.iov_len = GetSVEHeaderSize();
+  ioVec.iov_base = &state.m_header;
+  ioVec.iov_len = sizeof(state.m_header);
 
-  m_sve_buffer_is_valid = false;
-  m_sve_header_is_valid = false;
+  // All SIMD/SVE/SSVE state must be re-read after doing this write.
+  m_sve_state_data.Invalidate();
+  m_ssve_state_data.Invalidate();
   m_fpu_is_valid = false;
 
-  return WriteRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE);
+  return WriteRegisterSet(&ioVec, sizeof(state.m_header), state.m_regset);
 }
 
 Status NativeRegisterContextLinux_arm64::ReadAllSVE() {
   Status error;
+  SVEStateData &state = CurrentSVEStateData();
 
-  if (m_sve_buffer_is_valid)
+  if (state.m_buffer_is_valid)
     return error;
 
   struct iovec ioVec;
-  ioVec.iov_base = GetSVEBuffer();
-  ioVec.iov_len = GetSVEBufferSize();
+  ioVec.iov_base = state.m_buffer.data();
+  ioVec.iov_len = state.m_buffer.size();
 
-  error = ReadRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE);
+  error = ReadRegisterSet(&ioVec, state.m_buffer.size(), state.m_regset);
 
   if (error.Success())
-    m_sve_buffer_is_valid = true;
+    state.m_buffer_is_valid = true;
 
   return error;
 }
@@ -903,16 +966,18 @@
   if (error.Fail())
     return error;
 
-  struct iovec ioVec;
+  SVEStateData &state = CurrentSVEStateData();
 
-  ioVec.iov_base = GetSVEBuffer();
-  ioVec.iov_len = GetSVEBufferSize();
+  struct iovec ioVec;
+  ioVec.iov_base = state.m_buffer.data();
+  ioVec.iov_len = state.m_buffer.size();
 
-  m_sve_buffer_is_valid = false;
-  m_sve_header_is_valid = false;
+  // All SIMD/SVE/SSVE state must be re-read after doing this write.
+  m_sve_state_data.Invalidate();
+  m_ssve_state_data.Invalidate();
   m_fpu_is_valid = false;
 
-  return WriteRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE);
+  return WriteRegisterSet(&ioVec, state.m_buffer.size(), state.m_regset);
 }
 
 Status NativeRegisterContextLinux_arm64::ReadMTEControl() {
@@ -985,29 +1050,57 @@
 
 void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() {
   // ConfigureRegisterContext gets called from InvalidateAllRegisters
-  // on every stop and configures SVE vector length.
+  // on every stop and configures SVE vector length and whether we are in
+  // streaming SVE mode.
   // If m_sve_state is set to SVEState::Disabled on first stop, code below will
   // be deemed non operational for the lifetime of current process.
-  if (!m_sve_header_is_valid && m_sve_state != SVEState::Disabled) {
+  if (!m_sve_state_data.m_header_is_valid &&
+      !m_ssve_state_data.m_header_is_valid &&
+      m_sve_state != SVEState::Disabled) {
+    // If we have SVE we may also have the SVE streaming mode that SME added.
+    // We can read the header of either mode, but only the active mode will
+    // have valid register data.
+
+    // Check whether SME is present and the streaming SVE mode is active.
+    m_sve_state_data.Invalidate();
+    m_ssve_state_data.Invalidate();
+    m_sve_state = SVEState::Streaming;
     Status error = ReadSVEHeader();
-    if (error.Success()) {
-      // If SVE is enabled thread can switch between SVEState::FPSIMD and
-      // SVEState::Full on every stop.
-      if ((m_sve_header.flags & sve::ptrace_regs_mask) ==
-          sve::ptrace_regs_fpsimd)
-        m_sve_state = SVEState::FPSIMD;
-      else if ((m_sve_header.flags & sve::ptrace_regs_mask) ==
-               sve::ptrace_regs_sve)
-        m_sve_state = SVEState::Full;
+    // Streaming mode is active if the header has the SVE active flag set.
+    if (error.Success() && ((m_ssve_state_data.m_header.flags &
+                             sve::ptrace_regs_mask) == sve::ptrace_regs_sve)) {
+      m_sve_state = SVEState::Streaming;
+    } else {
+      // If we're not streaming, non-streaming might be active.
+      m_sve_state_data.Invalidate();
+      m_ssve_state_data.Invalidate();
+      m_sve_state = SVEState::Full;
+      error = ReadSVEHeader();
+      if (error.Success()) {
+        m_sve_state_data.m_header_is_valid = true;
+
+        // If SVE is enabled thread can switch between SVEState::FPSIMD and
+        // SVEState::Full on every stop.
+        if ((m_sve_state_data.m_header.flags & sve::ptrace_regs_mask) ==
+            sve::ptrace_regs_fpsimd)
+          m_sve_state = SVEState::FPSIMD;
+        else if ((m_sve_state_data.m_header.flags & sve::ptrace_regs_mask) ==
+                 sve::ptrace_regs_sve)
+          m_sve_state = SVEState::Full;
+      }
+    }
 
+    if (m_sve_state == SVEState::Full || m_sve_state == SVEState::FPSIMD ||
+        m_sve_state == SVEState::Streaming) {
+      SVEStateData &sve_state = CurrentSVEStateData();
       // On every stop we configure SVE vector length by calling
       // ConfigureVectorLength regardless of current SVEState of this thread.
       uint32_t vq = RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64SVE;
-      if (sve::vl_valid(m_sve_header.vl))
-        vq = sve::vq_from_vl(m_sve_header.vl);
+      if (sve::vl_valid(sve_state.m_header.vl))
+        vq = sve::vq_from_vl(sve_state.m_header.vl);
 
       GetRegisterInfo().ConfigureVectorLength(vq);
-      m_sve_ptrace_payload.resize(sve::PTraceSize(vq, sve::ptrace_regs_sve));
+      sve_state.m_buffer.resize(sve::PTraceSize(vq, sve::ptrace_regs_sve));
     }
   }
 }
@@ -1025,7 +1118,9 @@
     const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB];
     sve_reg_offset = sve::ptrace_fpsimd_offset +
                      (reg - GetRegisterInfo().GetRegNumSVEZ0()) * 16;
-  } else if (m_sve_state == SVEState::Full) {
+    // Between non-streaming and streaming mode, the layout is identical.
+  } else if (m_sve_state == SVEState::Full ||
+             m_sve_state == SVEState::Streaming) {
     uint32_t sve_z0_offset = GetGPRSize() + 16;
     sve_reg_offset =
         sve::SigRegsOffset() + reg_info->byte_offset - sve_z0_offset;
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to