After modifying truncate_error_folio(), we expect memory_failure() will return 0 instead of MF_FAILED. Also, we want to make sure memory_failure() signaling function is same.
Test that memory_failure() returns 0 for guest_memfd, where .error_remove_folio() is handled by not actually truncating, and returning MF_DELAYED. In addition, test that SIGBUS signaling behavior is not changed before and after this modification. There are two kinds of guest memory failure injections - madvise or debugfs. When memory failure is injected using madvise, the MF_ACTION_REQUIRED flag is set, and the page is mapped and dirty, the process should get a SIGBUS. When memory is failure is injected using debugfs, the KILL_EARLY machine check memory corruption kill policy is set, and the page is mapped and dirty, the process should get a SIGBUS. Co-developed-by: Ackerley Tng <[email protected]> Signed-off-by: Ackerley Tng <[email protected]> Signed-off-by: Lisa Wang <[email protected]> --- tools/testing/selftests/kvm/Makefile.kvm | 2 + .../kvm/guest_memfd_memory_failure_test.c | 336 +++++++++++++++++++++ 2 files changed, 338 insertions(+) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index fdec90e85467..9409ded6cbce 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -146,6 +146,7 @@ TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test +TEST_GEN_PROGS_x86 += guest_memfd_memory_failure_test TEST_GEN_PROGS_x86 += hardware_disable_test TEST_GEN_PROGS_x86 += memslot_modification_stress_test TEST_GEN_PROGS_x86 += memslot_perf_test @@ -186,6 +187,7 @@ TEST_GEN_PROGS_arm64 += coalesced_io_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test TEST_GEN_PROGS_arm64 += get-reg-list TEST_GEN_PROGS_arm64 += guest_memfd_test +TEST_GEN_PROGS_arm64 += guest_memfd_memory_failure_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test diff --git a/tools/testing/selftests/kvm/guest_memfd_memory_failure_test.c b/tools/testing/selftests/kvm/guest_memfd_memory_failure_test.c new file mode 100644 index 000000000000..6c8032d390ae --- /dev/null +++ b/tools/testing/selftests/kvm/guest_memfd_memory_failure_test.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright Intel Corporation, 2026 + * + * Author: Ackerley Tng <[email protected]> + * Author: Lisa Wang <[email protected]> + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> +#include <linux/prctl.h> +#include <sys/prctl.h> + +#include <linux/bitmap.h> +#include <linux/falloc.h> +#include <linux/sizes.h> +#include <sys/mman.h> +#include <sys/types.h> +#include "kvm_util.h" +#include "test_util.h" +#include "kselftest_harness.h" + +static size_t page_size, total_size; + +enum memory_failure_injection_method { + MF_INJECT_DEBUGFS, + MF_INJECT_MADVISE, +}; + +FIXTURE(guest_memfd_failure) { + struct kvm_vm *vm; + int fd; + unsigned long poisoned_pfn; +}; + +FIXTURE_VARIANT(guest_memfd_failure) { + enum memory_failure_injection_method method; + int kill_config; + bool map_page; + bool dirty_page; + bool sigbus_expected; + int return_code; +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, debugfs_early_dirty) { + .method = MF_INJECT_DEBUGFS, + .kill_config = PR_MCE_KILL_EARLY, + .map_page = true, + .dirty_page = true, + .sigbus_expected = true, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, debugfs_early_clean) { + .method = MF_INJECT_DEBUGFS, + .kill_config = PR_MCE_KILL_EARLY, + .map_page = true, + .dirty_page = false, + .sigbus_expected = false, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, debugfs_early_unmapped) { + .method = MF_INJECT_DEBUGFS, + .kill_config = PR_MCE_KILL_EARLY, + .map_page = false, + .dirty_page = true, + .sigbus_expected = false, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, debugfs_late_dirty) { + .method = MF_INJECT_DEBUGFS, + .kill_config = PR_MCE_KILL_LATE, + .map_page = true, + .dirty_page = true, + .sigbus_expected = false, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, debugfs_late_clean) { + .method = MF_INJECT_DEBUGFS, + .kill_config = PR_MCE_KILL_LATE, + .map_page = true, + .dirty_page = false, + .sigbus_expected = false, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, debugfs_late_unmapped) { + .method = MF_INJECT_DEBUGFS, + .kill_config = PR_MCE_KILL_LATE, + .map_page = false, + .dirty_page = true, + .sigbus_expected = false, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, madvise_dirty) { + .method = MF_INJECT_MADVISE, + .kill_config = PR_MCE_KILL_DEFAULT, + .map_page = true, + .dirty_page = true, + .sigbus_expected = true, + .return_code = 0, +}; + +FIXTURE_VARIANT_ADD(guest_memfd_failure, madvise_clean) { + .method = MF_INJECT_MADVISE, + .kill_config = PR_MCE_KILL_DEFAULT, + .map_page = true, + .dirty_page = false, + .sigbus_expected = false, + .return_code = 0, +}; + +FIXTURE_SETUP(guest_memfd_failure) +{ + self->vm = NULL; + self->fd = -1; + self->poisoned_pfn = 0; +} + +static void write_memory_failure(unsigned long pfn, bool mark, int expected_return_code) +{ + char path[PATH_MAX]; + char *filename; + char buf[20]; + int ret; + int len; + int fd; + + filename = mark ? "corrupt-pfn" : "unpoison-pfn"; + snprintf(path, PATH_MAX, "/sys/kernel/debug/hwpoison/%s", filename); + + fd = open(path, O_WRONLY); + TEST_ASSERT(fd >= 0, "Failed to open %s.", path); + + len = snprintf(buf, sizeof(buf), "0x%lx\n", pfn); + if (len < 0 || (unsigned int)len >= sizeof(buf)) + TEST_ASSERT(0, "snprintf failed or truncated."); + + ret = write(fd, buf, len); + if (expected_return_code == 0) { + /* + * If the memory_failure() returns 0, write() should be successful, + * which returns how many bytes it writes. + */ + TEST_ASSERT(ret > 0, "Writing memory failure (path: %s) failed: %s", path, + strerror(errno)); + } else { + TEST_ASSERT_EQ(ret, -1); + /* errno is memory_failure() return code. */ + TEST_ASSERT_EQ(errno, expected_return_code); + } + + close(fd); +} + +static void mark_memory_failure(unsigned long pfn, int expected_return_code) +{ + write_memory_failure(pfn, true, expected_return_code); +} + +static void unmark_memory_failure(unsigned long pfn, int expected_return_code) +{ + write_memory_failure(pfn, false, expected_return_code); +} + +FIXTURE_TEARDOWN(guest_memfd_failure) +{ + if (self->fd >= 0) + close(self->fd); + if (self->vm) + kvm_vm_free(self->vm); + if (self->poisoned_pfn) + unmark_memory_failure(self->poisoned_pfn, 0); +} + +static unsigned long addr_to_pfn(void *addr) +{ + const uint64_t pagemap_pfn_mask = BIT(54) - 1; + const uint64_t pagemap_page_present = BIT(63); + uint64_t page_info; + ssize_t n_bytes; + int pagemap_fd; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd >= 0, "Opening pagemap should succeed."); + + n_bytes = pread(pagemap_fd, &page_info, 8, (uint64_t)addr / page_size * 8); + TEST_ASSERT(n_bytes == 8, "pread of pagemap failed. n_bytes=%ld", n_bytes); + + close(pagemap_fd); + + TEST_ASSERT(page_info & pagemap_page_present, "The page for addr should be present"); + return page_info & pagemap_pfn_mask; +} + +static void do_test_memory_failure(FIXTURE_DATA(guest_memfd_failure) * self, + const FIXTURE_VARIANT(guest_memfd_failure) * variant) +{ + unsigned long memory_failure_pfn; + char *memory_failure_addr; + char *mem; + int ret; + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, self->fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + memory_failure_addr = mem + page_size; + if (variant->dirty_page) + *memory_failure_addr = 'A'; + else + READ_ONCE(*memory_failure_addr); + + /* Fault in page to read pfn, then unmap page for testing if needed. */ + memory_failure_pfn = addr_to_pfn(memory_failure_addr); + if (!variant->map_page) + madvise(memory_failure_addr, page_size, MADV_DONTNEED); + + ret = prctl(PR_MCE_KILL, PR_MCE_KILL_SET, variant->kill_config, 0, 0); + TEST_ASSERT_EQ(ret, 0); + + self->poisoned_pfn = memory_failure_pfn; + + ret = 0; + switch (variant->method) { + case MF_INJECT_DEBUGFS: { + /* DEBUGFS injection handles return_code test inside the mark_memory_failure(). */ + if (variant->sigbus_expected) + TEST_EXPECT_SIGBUS(mark_memory_failure(memory_failure_pfn, + variant->return_code)); + else + mark_memory_failure(memory_failure_pfn, variant->return_code); + break; + } + case MF_INJECT_MADVISE: { + /* + * MADV_HWPOISON uses get_user_pages() so the page will always + * be faulted in at the point of memory_failure() + */ + if (variant->sigbus_expected) + TEST_EXPECT_SIGBUS(ret = madvise(memory_failure_addr, + page_size, MADV_HWPOISON)); + else + ret = madvise(memory_failure_addr, page_size, MADV_HWPOISON); + + if (variant->return_code == 0) + TEST_ASSERT(ret == variant->return_code, "Memory failure failed. Errno: %s", + strerror(errno)); + else { + /* errno is memory_failure() return code. */ + TEST_ASSERT_EQ(errno, variant->return_code); + } + break; + } + default: + TEST_FAIL("Unhandled memory failure injection method %d.", variant->method); + } + + TEST_EXPECT_SIGBUS(READ_ONCE(*memory_failure_addr)); + TEST_EXPECT_SIGBUS(*memory_failure_addr = 'A'); + + ret = munmap(mem, total_size); + TEST_ASSERT(!ret, "munmap() should succeed."); + + ret = fallocate(self->fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, total_size); + TEST_ASSERT(!ret, "Truncate the entire file (cleanup) should succeed."); + + ret = prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_DEFAULT, 0, 0); + TEST_ASSERT_EQ(ret, 0); + + unmark_memory_failure(memory_failure_pfn, 0); + self->poisoned_pfn = 0; +} + +TEST_F(guest_memfd_failure, test_memory_failure) +{ + unsigned long vm_types, vm_type; + + total_size = page_size * 4; + + vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); + if (!vm_types) + vm_types = BIT(VM_TYPE_DEFAULT); + + for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) { + uint64_t flags; + + self->vm = vm_create_barebones_type(vm_type); + flags = vm_check_cap(self->vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (!(flags & GUEST_MEMFD_FLAG_INIT_SHARED)) { + kvm_vm_free(self->vm); + self->vm = NULL; + continue; + } + + self->fd = vm_create_guest_memfd(self->vm, total_size, + GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED); + ASSERT_GE(self->fd, 0) TH_LOG("vm_create_guest_memfd failed"); + + do_test_memory_failure(self, variant); + + close(self->fd); + self->fd = -1; + kvm_vm_free(self->vm); + self->vm = NULL; + } +} + +static bool can_inject_memory_failure(void) +{ + int fd; + + fd = open("/sys/kernel/debug/hwpoison/corrupt-pfn", O_WRONLY); + if (fd < 0) + return false; + + close(fd); + return true; +} + +int main(int argc, char **argv) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED); + __TEST_REQUIRE(can_inject_memory_failure(), + "Insufficient permissions to access hwpoison debugfs (requires CAP_SYS_ADMIN / root))"); + page_size = getpagesize(); + + return test_harness_run(argc, argv); +} -- 2.54.0.1013.g208068f2d8-goog

