DavidSpickett updated this revision to Diff 541520. DavidSpickett added a comment.
Turns out I was misinterpreting this setence from the kernel docs: Note that when SME is present and streaming SVE mode is in use the FPSIMD subset of registers will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode in the target. https://kernel.org/doc/html/v6.2/arm64/sve.html I read this as "should be read" not, "will be read". The intent of the statement is to make you aware that the register sets are connected in that one can effect the other. However, our strategy of using the bottom part of the Z registers to read the V registers is still valid as long as we do not want to switch modes, which we never do. So I've done what Omair suggested and reverted to a single set of state for the non-streaming and streaming modes. With m_sve_state to tell the difference. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154926/new/ https://reviews.llvm.org/D154926 Files: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c
Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c =================================================================== --- /dev/null +++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c @@ -0,0 +1,108 @@ +#include <stdint.h> +#include <sys/prctl.h> + +void write_simd_regs() { +#define WRITE_SIMD(NUM) \ + asm volatile("MOV v" #NUM ".d[0], %0\n\t" \ + "MOV v" #NUM ".d[1], %0\n\t" ::"r"(NUM)) + + WRITE_SIMD(0); + WRITE_SIMD(1); + WRITE_SIMD(2); + WRITE_SIMD(3); + WRITE_SIMD(4); + WRITE_SIMD(5); + WRITE_SIMD(6); + WRITE_SIMD(7); + WRITE_SIMD(8); + WRITE_SIMD(9); + WRITE_SIMD(10); + WRITE_SIMD(11); + WRITE_SIMD(12); + WRITE_SIMD(13); + WRITE_SIMD(14); + WRITE_SIMD(15); + WRITE_SIMD(16); + WRITE_SIMD(17); + WRITE_SIMD(18); + WRITE_SIMD(19); + WRITE_SIMD(20); + WRITE_SIMD(21); + WRITE_SIMD(22); + WRITE_SIMD(23); + WRITE_SIMD(24); + WRITE_SIMD(25); + WRITE_SIMD(26); + WRITE_SIMD(27); + WRITE_SIMD(28); + WRITE_SIMD(29); + WRITE_SIMD(30); + WRITE_SIMD(31); +} + +unsigned verify_simd_regs() { + uint64_t got_low = 0; + uint64_t got_high = 0; + uint64_t target = 0; + +#define VERIFY_SIMD(NUM) \ + do { \ + got_low = 0; \ + got_high = 0; \ + asm volatile("MOV %0, v" #NUM ".d[0]\n\t" \ + "MOV %1, v" #NUM ".d[1]\n\t" \ + : "=r"(got_low), "=r"(got_high)); \ + target = NUM + 1; \ + if ((got_low != target) || (got_high != target)) \ + return 1; \ + } while (0) + + VERIFY_SIMD(0); + VERIFY_SIMD(1); + VERIFY_SIMD(2); + VERIFY_SIMD(3); + VERIFY_SIMD(4); + VERIFY_SIMD(5); + VERIFY_SIMD(6); + VERIFY_SIMD(7); + VERIFY_SIMD(8); + VERIFY_SIMD(9); + VERIFY_SIMD(10); + VERIFY_SIMD(11); + VERIFY_SIMD(12); + VERIFY_SIMD(13); + VERIFY_SIMD(14); + VERIFY_SIMD(15); + VERIFY_SIMD(16); + VERIFY_SIMD(17); + VERIFY_SIMD(18); + VERIFY_SIMD(19); + VERIFY_SIMD(20); + VERIFY_SIMD(21); + VERIFY_SIMD(22); + VERIFY_SIMD(23); + VERIFY_SIMD(24); + VERIFY_SIMD(25); + VERIFY_SIMD(26); + VERIFY_SIMD(27); + VERIFY_SIMD(28); + VERIFY_SIMD(29); + VERIFY_SIMD(30); + VERIFY_SIMD(31); + + return 0; +} + +int main() { +#ifdef SSVE + asm volatile("msr s0_3_c4_c7_3, xzr" /*smstart*/); +#elif defined SVE + // Make the non-streaming SVE registers active. + asm volatile("cpy z0.b, p0/z, #1\n\t"); +#endif + // else test plain SIMD access. + + write_simd_regs(); + + return verify_simd_regs(); // Set a break point here. +} Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py =================================================================== --- /dev/null +++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py @@ -0,0 +1,108 @@ +""" +Test that LLDB correctly reads and writes AArch64 SIMD registers in SVE, +streaming SVE and normal SIMD modes. + +There are a few operating modes and we use different strategies for each: +* Without SVE, in SIMD mode - read the SIMD regset. +* With SVE, but SVE is inactive - read the SVE regset, but get SIMD data from it. +* With SVE, SVE is active - read the SVE regset, use the bottom 128 bits of the + Z registers. +* With streaming SVE active - read the SSVE regset, use the bottom 128 bits of + the Z registers. + +This text excercise most of those. +""" + +from enum import Enum +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class Mode(Enum): + SIMD = 0 + SVE = 1 + SSVE = 2 + +class SVESIMDRegistersTestCase(TestBase): + def get_build_flags(self, mode): + cflags = "-march=armv8-a+sve" + if mode == Mode.SSVE: + cflags += " -DSSVE" + elif mode == Mode.SVE: + cflags += " -DSVE" + # else we want SIMD mode, which processes start up in already. + + return {"CFLAGS_EXTRAS": cflags} + + def skip_if_needed(self, mode): + if (mode == Mode.SVE) and not self.isAArch64SVE(): + self.skipTest("SVE registers must be supported.") + + if (mode == Mode.SSVE) and not self.isAArch64SME(): + self.skipTest("SSVE registers must be supported.") + + def make_simd_value(self, n): + pad = " ".join(["0x00"] * 7) + return "{{0x{:02x} {} 0x{:02x} {}}}".format(n, pad, n, pad) + + def sve_simd_registers_impl(self, mode): + self.skip_if_needed(mode) + + self.build(dictionary=self.get_build_flags(mode)) + self.line = line_number("main.c", "// Set a break point here.") + + exe = self.getBuildArtifact("a.out") + self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, "main.c", self.line, num_expected_locations=1 + ) + self.runCmd("run", RUN_SUCCEEDED) + + self.expect( + "thread backtrace", + STOPPED_DUE_TO_BREAKPOINT, + substrs=["stop reason = breakpoint 1."], + ) + + # These are 128 bit registers, so getting them from the API as unsigned + # values doesn't work. Check the command output instead. + for i in range(32): + self.expect("register read v{}".format(i), + substrs=[self.make_simd_value(i)]) + + # Write a new set of values. The kernel will move the program back to + # non-streaming mode here. + for i in range(32): + self.runCmd("register write v{} \"{}\"".format( + i, self.make_simd_value(i+1))) + + # Should be visible within lldb. + for i in range(32): + self.expect("register read v{}".format(i), + substrs=[self.make_simd_value(i+1)]) + + # The program should agree with lldb. + self.expect("continue", substrs=["exited with status = 0"]) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_simd_registers_sve(self): + """Test read/write of SIMD registers when in SVE mode.""" + self.sve_simd_registers_impl(Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_simd_registers_ssve(self): + """Test read/write of SIMD registers when in SSVE mode.""" + self.sve_simd_registers_impl(Mode.SSVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_simd_registers_simd(self): + """Test read/write of SIMD registers when in SIMD mode.""" + self.sve_simd_registers_impl(Mode.SIMD) \ No newline at end of file Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile =================================================================== --- lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile +++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile @@ -1,5 +1,3 @@ C_SOURCES := main.c -CFLAGS_EXTRAS := -march=armv8-a+sve - include Makefile.rules Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c =================================================================== --- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c +++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c @@ -1,6 +1,15 @@ +#include <stdbool.h> #include <sys/prctl.h> +#ifndef PR_SME_SET_VL +#define PR_SME_SET_VL 63 +#endif + +#define SMSTART() asm volatile("msr s0_3_c4_c7_3, xzr" /*smstart*/) + void write_sve_regs() { + // We assume the smefa64 feature is present, which allows ffr access + // in streaming mode. asm volatile("setffr\n\t"); asm volatile("ptrue p0.b\n\t"); asm volatile("ptrue p1.h\n\t"); @@ -53,18 +62,84 @@ asm volatile("cpy z31.b, p15/z, #32\n\t"); } +// Set some different values so we can tell if lldb correctly returns to the set +// above after the expression is finished. +void write_sve_regs_expr() { + asm volatile("pfalse p0.b\n\t"); + asm volatile("wrffr p0.b\n\t"); + asm volatile("pfalse p1.b\n\t"); + asm volatile("pfalse p2.b\n\t"); + asm volatile("pfalse p3.b\n\t"); + asm volatile("ptrue p4.b\n\t"); + asm volatile("pfalse p5.b\n\t"); + asm volatile("pfalse p6.b\n\t"); + asm volatile("pfalse p7.b\n\t"); + asm volatile("pfalse p8.b\n\t"); + asm volatile("ptrue p9.b\n\t"); + asm volatile("pfalse p10.b\n\t"); + asm volatile("pfalse p11.b\n\t"); + asm volatile("pfalse p12.b\n\t"); + asm volatile("pfalse p13.b\n\t"); + asm volatile("ptrue p14.b\n\t"); + asm volatile("pfalse p15.b\n\t"); + + asm volatile("cpy z0.b, p0/z, #2\n\t"); + asm volatile("cpy z1.b, p5/z, #3\n\t"); + asm volatile("cpy z2.b, p10/z, #4\n\t"); + asm volatile("cpy z3.b, p15/z, #5\n\t"); + asm volatile("cpy z4.b, p0/z, #6\n\t"); + asm volatile("cpy z5.b, p5/z, #7\n\t"); + asm volatile("cpy z6.b, p10/z, #8\n\t"); + asm volatile("cpy z7.b, p15/z, #9\n\t"); + asm volatile("cpy z8.b, p0/z, #10\n\t"); + asm volatile("cpy z9.b, p5/z, #11\n\t"); + asm volatile("cpy z10.b, p10/z, #12\n\t"); + asm volatile("cpy z11.b, p15/z, #13\n\t"); + asm volatile("cpy z12.b, p0/z, #14\n\t"); + asm volatile("cpy z13.b, p5/z, #15\n\t"); + asm volatile("cpy z14.b, p10/z, #16\n\t"); + asm volatile("cpy z15.b, p15/z, #17\n\t"); + asm volatile("cpy z16.b, p0/z, #18\n\t"); + asm volatile("cpy z17.b, p5/z, #19\n\t"); + asm volatile("cpy z18.b, p10/z, #20\n\t"); + asm volatile("cpy z19.b, p15/z, #21\n\t"); + asm volatile("cpy z20.b, p0/z, #22\n\t"); + asm volatile("cpy z21.b, p5/z, #23\n\t"); + asm volatile("cpy z22.b, p10/z, #24\n\t"); + asm volatile("cpy z23.b, p15/z, #25\n\t"); + asm volatile("cpy z24.b, p0/z, #26\n\t"); + asm volatile("cpy z25.b, p5/z, #27\n\t"); + asm volatile("cpy z26.b, p10/z, #28\n\t"); + asm volatile("cpy z27.b, p15/z, #29\n\t"); + asm volatile("cpy z28.b, p0/z, #30\n\t"); + asm volatile("cpy z29.b, p5/z, #31\n\t"); + asm volatile("cpy z30.b, p10/z, #32\n\t"); + asm volatile("cpy z31.b, p15/z, #33\n\t"); +} + // This function will be called using jitted expression call. We change vector // length and write SVE registers. Our program context should restore to // orignal vector length and register values after expression evaluation. -int expr_eval_func() { - prctl(PR_SVE_SET_VL, 8 * 2); - write_sve_regs(); - prctl(PR_SVE_SET_VL, 8 * 4); - write_sve_regs(); +int expr_eval_func(bool streaming) { + int SET_VL_OPT = streaming ? PR_SME_SET_VL : PR_SVE_SET_VL; + prctl(SET_VL_OPT, 8 * 2); + // Note that doing a syscall brings you back to non-streaming mode, so we + // don't need to SMSTOP here. + if (streaming) + SMSTART(); + write_sve_regs_expr(); + prctl(SET_VL_OPT, 8 * 4); + if (streaming) + SMSTART(); + write_sve_regs_expr(); return 1; } int main() { +#ifdef START_SSVE + SMSTART(); +#endif write_sve_regs(); + return 0; // Set a break point here. } Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py =================================================================== --- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -2,11 +2,15 @@ Test the AArch64 SVE registers. """ +from enum import Enum import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil +class Mode(Enum): + SVE = 0 + SSVE = 1 class RegisterCommandsTestCase(TestBase): def check_sve_register_size(self, set, name, expected): @@ -61,20 +65,28 @@ self.expect("register read " + "ffr", substrs=[p_regs_value]) - @no_debug_info_test - @skipIf(archs=no_match(["aarch64"])) - @skipIf(oslist=no_match(["linux"])) - def test_sve_registers_configuration(self): - """Test AArch64 SVE registers size configuration.""" - self.build() + def get_build_flags(self, mode): + cflags = "-march=armv8-a+sve" + if mode == Mode.SSVE: + cflags += " -DSTART_SSVE" + return {"CFLAGS_EXTRAS": cflags} + + def skip_if_needed(self, mode): + if (mode == Mode.SVE) and not self.isAArch64SVE(): + self.skipTest("SVE registers must be supported.") + + if (mode == Mode.SSVE) and not self.isAArch64SME(): + self.skipTest("SSVE registers must be supported.") + + def sve_registers_configuration_impl(self, mode): + self.skip_if_needed(mode) + + self.build(dictionary=self.get_build_flags(mode)) self.line = line_number("main.c", "// Set a break point here.") exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.isAArch64SVE(): - self.skipTest("SVE registers must be supported.") - lldbutil.run_break_set_by_file_and_line( self, "main.c", self.line, num_expected_locations=1 ) @@ -91,26 +103,17 @@ thread = process.GetThreadAtIndex(0) currentFrame = thread.GetFrameAtIndex(0) - has_sve = False - for registerSet in currentFrame.GetRegisters(): - if "Scalable Vector Extension Registers" in registerSet.GetName(): - has_sve = True - registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters() - - sve_registers = registerSets.GetValueAtIndex(2) - - vg_reg = sve_registers.GetChildMemberWithName("vg") + sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers") + self.assertTrue(sve_registers) vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned() z_reg_size = vg_reg_value * 8 - - p_reg_size = z_reg_size / 8 - for i in range(32): self.check_sve_register_size(sve_registers, "z%i" % (i), z_reg_size) + p_reg_size = z_reg_size / 8 for i in range(16): self.check_sve_register_size(sve_registers, "p%i" % (i), p_reg_size) @@ -119,17 +122,26 @@ @no_debug_info_test @skipIf(archs=no_match(["aarch64"])) @skipIf(oslist=no_match(["linux"])) - def test_sve_registers_read_write(self): - """Test AArch64 SVE registers read and write.""" - self.build() - self.line = line_number("main.c", "// Set a break point here.") + def test_sve_registers_configuration(self): + """Test AArch64 SVE registers size configuration.""" + self.sve_registers_configuration_impl(Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_ssve_registers_configuration(self): + """Test AArch64 SSVE registers size configuration.""" + self.sve_registers_configuration_impl(Mode.SSVE) + + def sve_registers_read_write_impl(self, start_mode, eval_mode): + self.skip_if_needed(start_mode) + self.skip_if_needed(eval_mode) + self.build(dictionary=self.get_build_flags(start_mode)) exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.isAArch64SVE(): - self.skipTest("SVE registers must be supported.") - + self.line = line_number("main.c", "// Set a break point here.") lldbutil.run_break_set_by_file_and_line( self, "main.c", self.line, num_expected_locations=1 ) @@ -143,34 +155,55 @@ target = self.dbg.GetSelectedTarget() process = target.GetProcess() - thread = process.GetThreadAtIndex(0) - currentFrame = thread.GetFrameAtIndex(0) - - has_sve = False - for registerSet in currentFrame.GetRegisters(): - if "Scalable Vector Extension Registers" in registerSet.GetName(): - has_sve = True registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters() - - sve_registers = registerSets.GetValueAtIndex(2) - - vg_reg = sve_registers.GetChildMemberWithName("vg") + sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers") + self.assertTrue(sve_registers) vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned() - z_reg_size = vg_reg_value * 8 - self.check_sve_regs_read(z_reg_size) # Evaluate simple expression and print function expr_eval_func address. self.expect("expression expr_eval_func", substrs=["= 0x"]) # Evaluate expression call function expr_eval_func. - self.expect_expr("expr_eval_func()", result_type="int", result_value="1") + self.expect_expr("expr_eval_func({})".format( + "true" if (eval_mode == Mode.SSVE) else "false"), result_type="int", + result_value="1") # We called a jitted function above which must not have changed SVE # vector length or register values. self.check_sve_regs_read(z_reg_size) self.check_sve_regs_read_after_write(z_reg_size) + + # The following tests all setup some register values then evaluate an + # expression. After the expression, the mode and register values should be + # the same as before. Finally they read/write some values in the registers. + # The only difference is the mode we start the program in, and the mode + # the expression function uses. + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_sve_sve(self): + self.sve_registers_read_write_impl(Mode.SVE, Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_ssve_ssve(self): + self.sve_registers_read_write_impl(Mode.SSVE, Mode.SSVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_sve_ssve(self): + self.sve_registers_read_write_impl(Mode.SVE, Mode.SSVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_registers_expr_read_write_ssve_sve(self): + self.sve_registers_read_write_impl(Mode.SSVE, Mode.SVE) \ No newline at end of file Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile =================================================================== --- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile +++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile @@ -1,5 +1,3 @@ C_SOURCES := main.c -CFLAGS_EXTRAS := -march=armv8-a+sve - include Makefile.rules Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c =================================================================== --- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c +++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c @@ -1,6 +1,12 @@ #include <pthread.h> #include <sys/prctl.h> +#ifndef PR_SME_SET_VL +#define PR_SME_SET_VL 63 +#endif + +#define SMSTART() asm volatile("msr s0_3_c4_c7_3, xzr" /*smstart*/) + static inline void write_sve_registers() { asm volatile("setffr\n\t"); asm volatile("ptrue p0.b\n\t"); @@ -54,26 +60,41 @@ asm volatile("cpy z31.b, p15/z, #32\n\t"); } +int SET_VL_OPT = PR_SVE_SET_VL; + void *threadX_func(void *x_arg) { - prctl(PR_SVE_SET_VL, 8 * 4); + prctl(SET_VL_OPT, 8 * 4); +#ifdef USE_SSVE + SMSTART(); +#endif write_sve_registers(); write_sve_registers(); // Thread X breakpoint 1 return NULL; // Thread X breakpoint 2 } void *threadY_func(void *y_arg) { - prctl(PR_SVE_SET_VL, 8 * 2); + prctl(SET_VL_OPT, 8 * 2); +#ifdef USE_SSVE + SMSTART(); +#endif write_sve_registers(); write_sve_registers(); // Thread Y breakpoint 1 return NULL; // Thread Y breakpoint 2 } int main() { +#ifdef USE_SSVE + SET_VL_OPT = PR_SME_SET_VL; +#endif + /* this variable is our reference to the second thread */ pthread_t x_thread, y_thread; /* Set vector length to 8 and write SVE registers values */ - prctl(PR_SVE_SET_VL, 8 * 8); + prctl(SET_VL_OPT, 8 * 8); +#ifdef USE_SSVE + SMSTART(); +#endif write_sve_registers(); /* create a second thread which executes with argument x */ Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py =================================================================== --- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py +++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py @@ -1,5 +1,6 @@ """ -Test the AArch64 SVE registers dynamic resize with multiple threads. +Test the AArch64 SVE and Streaming SVE (SSVE) registers dynamic resize with +multiple threads. This test assumes a minimum supported vector length (VL) of 256 bits and will test 512 bits if possible. We refer to "vg" which is the @@ -7,11 +8,15 @@ the same as a vg of 4. """ +from enum import Enum import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil +class Mode(Enum): + SVE = 0 + SSVE = 1 class RegisterCommandsTestCase(TestBase): def get_supported_vg(self): @@ -45,6 +50,9 @@ if not self.res.GetError(): supported_vg.append(vg) + self.runCmd("breakpoint delete 1") + self.runCmd("continue") + return supported_vg def check_sve_registers(self, vg_test_value): @@ -88,24 +96,24 @@ self.expect("register read ffr", substrs=[p_regs_value]) - @no_debug_info_test - @skipIf(archs=no_match(["aarch64"])) - @skipIf(oslist=no_match(["linux"])) - def test_sve_registers_dynamic_config(self): - """Test AArch64 SVE registers multi-threaded dynamic resize.""" - - if not self.isAArch64SVE(): + def run_sve_test(self, mode): + if (mode == Mode.SVE) and not self.isAArch64SVE(): self.skipTest("SVE registers must be supported.") + if (mode == Mode.SSVE) and not self.isAArch64SME(): + self.skipTest("Streaming SVE registers must be supported.") + + cflags = "-march=armv8-a+sve -lpthread" + if mode == Mode.SSVE: + cflags += " -DUSE_SSVE" + self.build(dictionary={"CFLAGS_EXTRAS": cflags}) + self.build() supported_vg = self.get_supported_vg() if not (2 in supported_vg and 4 in supported_vg): self.skipTest("Not all required SVE vector lengths are supported.") - exe = self.getBuildArtifact("a.out") - self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - main_thread_stop_line = line_number("main.c", "// Break in main thread") lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line) @@ -176,3 +184,17 @@ elif stopped_at_line_number == thY_break_line2: self.runCmd("thread select %d" % (idx + 1)) self.check_sve_registers(4) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_sve_registers_dynamic_config(self): + """Test AArch64 SVE registers multi-threaded dynamic resize.""" + self.run_sve_test(Mode.SVE) + + @no_debug_info_test + @skipIf(archs=no_match(["aarch64"])) + @skipIf(oslist=no_match(["linux"])) + def test_ssve_registers_dynamic_config(self): + """Test AArch64 SSVE registers multi-threaded dynamic resize.""" + self.run_sve_test(Mode.SSVE) Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h =================================================================== --- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -15,7 +15,7 @@ #include "lldb/lldb-private.h" #include <map> -enum class SVEState { Unknown, Disabled, FPSIMD, Full }; +enum class SVEState : uint8_t { Unknown, Disabled, FPSIMD, Full, Streaming }; class RegisterInfoPOSIX_arm64 : public lldb_private::RegisterInfoAndSetInterface { @@ -26,9 +26,10 @@ enum { eRegsetMaskDefault = 0, eRegsetMaskSVE = 1, - eRegsetMaskPAuth = 2, - eRegsetMaskMTE = 4, - eRegsetMaskTLS = 8, + eRegsetMaskSSVE = 2, + eRegsetMaskPAuth = 4, + eRegsetMaskMTE = 8, + eRegsetMaskTLS = 16, eRegsetMaskDynamic = ~1, }; @@ -115,6 +116,7 @@ } bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); } + bool IsSSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); } bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp =================================================================== --- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -212,7 +212,7 @@ // dynamic register set like MTE, Pointer Authentication regset then we need // to create dynamic register infos and regset array. Push back all optional // register infos and regset and calculate register offsets accordingly. - if (m_opt_regsets.AllSet(eRegsetMaskSVE)) { + if (m_opt_regsets.AnySet(eRegsetMaskSVE | eRegsetMaskSSVE)) { m_register_info_p = g_register_infos_arm64_sve_le; m_register_info_count = sve_ffr + 1; m_per_regset_regnum_range[m_register_set_count++] = Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h =================================================================== --- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h +++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h @@ -149,7 +149,7 @@ void *GetTLSTPIDR() { return &m_tls_tpidr_reg; } - void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); }; + void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); } size_t GetSVEHeaderSize() { return sizeof(m_sve_header); } @@ -157,6 +157,8 @@ size_t GetSVEBufferSize() { return m_sve_ptrace_payload.size(); } + unsigned GetSVERegSet(); + size_t GetMTEControlSize() { return sizeof(m_mte_ctrl_reg); } size_t GetTLSTPIDRSize() { return sizeof(m_tls_tpidr_reg); } Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp =================================================================== --- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -36,6 +36,11 @@ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension */ #endif +#ifndef NT_ARM_SSVE +#define NT_ARM_SSVE \ + 0x40b /* ARM Scalable Matrix Extension, Streaming SVE mode */ +#endif + #ifndef NT_ARM_PAC_MASK #define NT_ARM_PAC_MASK 0x406 /* Pointer authentication code masks */ #endif @@ -71,9 +76,20 @@ if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET, native_thread.GetID(), ®set, &ioVec, sizeof(sve_header)) - .Success()) + .Success()) { opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSVE); + // We may also have the Scalable Matrix Extension (SME) which adds a + // streaming SVE mode. + ioVec.iov_len = sizeof(sve_header); + regset = NT_ARM_SSVE; + if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET, + native_thread.GetID(), ®set, + &ioVec, sizeof(sve_header)) + .Success()) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSSVE); + } + NativeProcessLinux &process = native_thread.GetProcess(); std::optional<uint64_t> auxv_at_hwcap = @@ -134,7 +150,7 @@ m_mte_ctrl_is_valid = false; m_tls_tpidr_is_valid = false; - if (GetRegisterInfo().IsSVEEnabled()) + if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) m_sve_state = SVEState::Unknown; else m_sve_state = SVEState::Disabled; @@ -203,25 +219,27 @@ assert(offset < GetFPRSize()); src = (uint8_t *)GetFPRBuffer() + offset; } else { - // SVE enabled, we will read and cache SVE ptrace data + // SVE or SSVE enabled, we will read and cache SVE ptrace data. + // In SIMD or Full mode, the data comes from the SVE regset. In streaming + // mode it comes from the streaming regset. error = ReadAllSVE(); if (error.Fail()) return error; // FPSR and FPCR will be located right after Z registers in - // SVEState::FPSIMD while in SVEState::Full they will be located at the - // end of register data after an alignment correction based on currently - // selected vector length. + // SVEState::FPSIMD while in SVEState::Full or SVEState::Streaming they + // will be located at the end of register data after an alignment + // correction based on currently selected vector length. uint32_t sve_reg_num = LLDB_INVALID_REGNUM; if (reg == GetRegisterInfo().GetRegNumFPSR()) { sve_reg_num = reg; - if (m_sve_state == SVEState::Full) + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming) offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegisterInfo().GetRegNumFPCR()) { sve_reg_num = reg; - if (m_sve_state == SVEState::Full) + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming) offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; @@ -344,25 +362,25 @@ return WriteFPR(); } else { - // SVE enabled, we will read and cache SVE ptrace data + // SVE enabled, we will read and cache SVE ptrace data. error = ReadAllSVE(); if (error.Fail()) return error; // FPSR and FPCR will be located right after Z registers in - // SVEState::FPSIMD while in SVEState::Full they will be located at the - // end of register data after an alignment correction based on currently - // selected vector length. + // SVEState::FPSIMD while in SVEState::Full or SVEState::Streaming they + // will be located at the end of register data after an alignment + // correction based on currently selected vector length. uint32_t sve_reg_num = LLDB_INVALID_REGNUM; if (reg == GetRegisterInfo().GetRegNumFPSR()) { sve_reg_num = reg; - if (m_sve_state == SVEState::Full) + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming) offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegisterInfo().GetRegNumFPCR()) { sve_reg_num = reg; - if (m_sve_state == SVEState::Full) + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming) offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; @@ -479,9 +497,10 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( lldb::WritableDataBufferSP &data_sp) { - // AArch64 register data must contain GPRs, either FPR or SVE registers - // and optional MTE register. Pointer Authentication (PAC) registers are - // read-only and will be skiped. + // AArch64 register data must contain GPRs and either FPR or SVE registers. + // SVE registers can be non-streaming (aka SVE) or streaming (aka SSVE). + // Finally an optional MTE register. Pointer Authentication (PAC) registers + // are read-only and will be skiped. // In order to create register data checkpoint we first read all register // values if not done already and calculate total size of register set data. @@ -495,8 +514,10 @@ return error; // If SVE is enabled we need not copy FPR separately. - if (GetRegisterInfo().IsSVEEnabled()) { + if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) { reg_data_byte_size += GetSVEBufferSize(); + // Also store the current SVE mode. + reg_data_byte_size += sizeof(uint32_t); error = ReadAllSVE(); } else { reg_data_byte_size += GetFPRSize(); @@ -524,7 +545,9 @@ ::memcpy(dst, GetGPRBuffer(), GetGPRBufferSize()); dst += GetGPRBufferSize(); - if (GetRegisterInfo().IsSVEEnabled()) { + if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) { + *dst = static_cast<uint8_t>(m_sve_state); + dst += sizeof(m_sve_state); ::memcpy(dst, GetSVEBuffer(), GetSVEBufferSize()); dst += GetSVEBufferSize(); } else { @@ -594,6 +617,10 @@ (data_sp->GetByteSize() > (reg_data_min_size + GetSVEHeaderSize())); if (contains_sve_reg_data) { + // Restore to the correct mode, streaming or not. + m_sve_state = static_cast<SVEState>(*src); + src += sizeof(m_sve_state); + // We have SVE register data first write SVE header. ::memcpy(GetSVEHeader(), src, GetSVEHeaderSize()); if (!sve::vl_valid(m_sve_header.vl)) { @@ -824,6 +851,10 @@ ConfigureRegisterContext(); } +unsigned NativeRegisterContextLinux_arm64::GetSVERegSet() { + return m_sve_state == SVEState::Streaming ? NT_ARM_SSVE : NT_ARM_SVE; +} + Status NativeRegisterContextLinux_arm64::ReadSVEHeader() { Status error; @@ -834,7 +865,7 @@ ioVec.iov_base = GetSVEHeader(); ioVec.iov_len = GetSVEHeaderSize(); - error = ReadRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE); + error = ReadRegisterSet(&ioVec, GetSVEHeaderSize(), GetSVERegSet()); if (error.Success()) m_sve_header_is_valid = true; @@ -875,12 +906,11 @@ m_sve_header_is_valid = false; m_fpu_is_valid = false; - return WriteRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE); + return WriteRegisterSet(&ioVec, GetSVEHeaderSize(), GetSVERegSet()); } Status NativeRegisterContextLinux_arm64::ReadAllSVE() { Status error; - if (m_sve_buffer_is_valid) return error; @@ -888,7 +918,7 @@ ioVec.iov_base = GetSVEBuffer(); ioVec.iov_len = GetSVEBufferSize(); - error = ReadRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE); + error = ReadRegisterSet(&ioVec, GetSVEBufferSize(), GetSVERegSet()); if (error.Success()) m_sve_buffer_is_valid = true; @@ -912,7 +942,7 @@ m_sve_header_is_valid = false; m_fpu_is_valid = false; - return WriteRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE); + return WriteRegisterSet(&ioVec, GetSVEBufferSize(), GetSVERegSet()); } Status NativeRegisterContextLinux_arm64::ReadMTEControl() { @@ -985,21 +1015,43 @@ void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() { // ConfigureRegisterContext gets called from InvalidateAllRegisters - // on every stop and configures SVE vector length. + // on every stop and configures SVE vector length and whether we are in + // streaming SVE mode. // If m_sve_state is set to SVEState::Disabled on first stop, code below will // be deemed non operational for the lifetime of current process. if (!m_sve_header_is_valid && m_sve_state != SVEState::Disabled) { + // If we have SVE we may also have the SVE streaming mode that SME added. + // We can read the header of either mode, but only the active mode will + // have valid register data. + + // Check whether SME is present and the streaming SVE mode is active. + m_sve_header_is_valid = false; + m_sve_buffer_is_valid = false; + m_sve_state = SVEState::Streaming; Status error = ReadSVEHeader(); - if (error.Success()) { - // If SVE is enabled thread can switch between SVEState::FPSIMD and - // SVEState::Full on every stop. - if ((m_sve_header.flags & sve::ptrace_regs_mask) == - sve::ptrace_regs_fpsimd) - m_sve_state = SVEState::FPSIMD; - else if ((m_sve_header.flags & sve::ptrace_regs_mask) == - sve::ptrace_regs_sve) - m_sve_state = SVEState::Full; + // Streaming mode is active if the header has the SVE active flag set. + if (!(error.Success() && ((m_sve_header.flags & sve::ptrace_regs_mask) == + sve::ptrace_regs_sve))) { + // Non-streaming might be active instead. + m_sve_header_is_valid = false; + m_sve_buffer_is_valid = false; + m_sve_state = SVEState::Full; + error = ReadSVEHeader(); + if (error.Success()) { + // If SVE is enabled thread can switch between SVEState::FPSIMD and + // SVEState::Full on every stop. + if ((m_sve_header.flags & sve::ptrace_regs_mask) == + sve::ptrace_regs_fpsimd) + m_sve_state = SVEState::FPSIMD; + // Else we are in SVEState::Full. + } else { + m_sve_state = SVEState::Disabled; + } + } + + if (m_sve_state == SVEState::Full || m_sve_state == SVEState::FPSIMD || + m_sve_state == SVEState::Streaming) { // On every stop we configure SVE vector length by calling // ConfigureVectorLength regardless of current SVEState of this thread. uint32_t vq = RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64SVE; @@ -1025,7 +1077,9 @@ const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; sve_reg_offset = sve::ptrace_fpsimd_offset + (reg - GetRegisterInfo().GetRegNumSVEZ0()) * 16; - } else if (m_sve_state == SVEState::Full) { + // Between non-streaming and streaming mode, the layout is identical. + } else if (m_sve_state == SVEState::Full || + m_sve_state == SVEState::Streaming) { uint32_t sve_z0_offset = GetGPRSize() + 16; sve_reg_offset = sve::SigRegsOffset() + reg_info->byte_offset - sve_z0_offset;
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits