On Fri, May 09, 2025 at 05:30:32PM +0200, David Hildenbrand wrote: > Let's test some basic functionality using /dev/mem. These tests will > implicitly cover some PAT (Page Attribute Handling) handling on x86. > > These tests will only run when /dev/mem access to the first two pages > in physical address space is possible and allowed; otherwise, the tests > are skipped. > > On current x86-64 with PAT inside a VM, all tests pass: > > TAP version 13 > 1..6 > # Starting 6 tests from 1 test cases. > # RUN pfnmap.madvise_disallowed ... > # OK pfnmap.madvise_disallowed > ok 1 pfnmap.madvise_disallowed > # RUN pfnmap.munmap_split ... > # OK pfnmap.munmap_split > ok 2 pfnmap.munmap_split > # RUN pfnmap.mremap_fixed ... > # OK pfnmap.mremap_fixed > ok 3 pfnmap.mremap_fixed > # RUN pfnmap.mremap_shrink ... > # OK pfnmap.mremap_shrink > ok 4 pfnmap.mremap_shrink > # RUN pfnmap.mremap_expand ... > # OK pfnmap.mremap_expand > ok 5 pfnmap.mremap_expand > # RUN pfnmap.fork ... > # OK pfnmap.fork > ok 6 pfnmap.fork > # PASSED: 6 / 6 tests passed. > # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0 > > However, we are able to trigger: > > [ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem > 0x00000000-0x00000fff] > > There are probably more things worth testing in the future, such as > MAP_PRIVATE handling. But this set of tests is sufficient to cover most of > the things we will rework regarding PAT handling. > > Cc: Andrew Morton <a...@linux-foundation.org> > Cc: Shuah Khan <sh...@kernel.org> > Cc: Lorenzo Stoakes <lorenzo.stoa...@oracle.com> > Cc: Ingo Molnar <mi...@redhat.com> > Cc: Peter Xu <pet...@redhat.com> > Cc: Dev Jain <dev.j...@arm.com> > Signed-off-by: David Hildenbrand <da...@redhat.com>
Nice, big improvement! Reviewed-by: Lorenzo Stoakes <lorenzo.stoa...@oracle.com> > --- > > Hopefully I didn't miss any review feedback. All good afaict! :) the only thing would be to make the siglongjmp() conditional but it's not a big deal. > > v1 -> v2: > * Rewrite using kselftest_harness, which simplifies a lot of things > * Add to .gitignore and run_vmtests.sh > * Register signal handler on demand > * Use volatile trick to force a read (not factoring out FORCE_READ just yet) > * Drop mprotect() test case > * Add some more comments why we test certain things > * Use NULL for mmap() first parameter instead of 0 > * Smaller fixes > > --- > tools/testing/selftests/mm/.gitignore | 1 + > tools/testing/selftests/mm/Makefile | 1 + > tools/testing/selftests/mm/pfnmap.c | 196 ++++++++++++++++++++++ > tools/testing/selftests/mm/run_vmtests.sh | 4 + > 4 files changed, 202 insertions(+) > create mode 100644 tools/testing/selftests/mm/pfnmap.c > > diff --git a/tools/testing/selftests/mm/.gitignore > b/tools/testing/selftests/mm/.gitignore > index 91db34941a143..824266982aa36 100644 > --- a/tools/testing/selftests/mm/.gitignore > +++ b/tools/testing/selftests/mm/.gitignore > @@ -20,6 +20,7 @@ mremap_test > on-fault-limit > transhuge-stress > pagemap_ioctl > +pfnmap > *.tmp* > protection_keys > protection_keys_32 > diff --git a/tools/testing/selftests/mm/Makefile > b/tools/testing/selftests/mm/Makefile > index ad4d6043a60f0..ae6f994d3add7 100644 > --- a/tools/testing/selftests/mm/Makefile > +++ b/tools/testing/selftests/mm/Makefile > @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test > TEST_GEN_FILES += mseal_test > TEST_GEN_FILES += on-fault-limit > TEST_GEN_FILES += pagemap_ioctl > +TEST_GEN_FILES += pfnmap > TEST_GEN_FILES += thuge-gen > TEST_GEN_FILES += transhuge-stress > TEST_GEN_FILES += uffd-stress > diff --git a/tools/testing/selftests/mm/pfnmap.c > b/tools/testing/selftests/mm/pfnmap.c > new file mode 100644 > index 0000000000000..8a9d19b6020c7 > --- /dev/null > +++ b/tools/testing/selftests/mm/pfnmap.c > @@ -0,0 +1,196 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' > + * > + * Copyright 2025, Red Hat, Inc. > + * > + * Author(s): David Hildenbrand <da...@redhat.com> > + */ > +#define _GNU_SOURCE > +#include <stdlib.h> > +#include <string.h> > +#include <stdint.h> > +#include <unistd.h> > +#include <errno.h> > +#include <fcntl.h> > +#include <signal.h> > +#include <setjmp.h> > +#include <linux/mman.h> > +#include <sys/mman.h> > +#include <sys/wait.h> > + > +#include "../kselftest_harness.h" > +#include "vm_util.h" > + > +static sigjmp_buf sigjmp_buf_env; > + > +static void signal_handler(int sig) > +{ > + siglongjmp(sigjmp_buf_env, -EFAULT); > +} > + > +static int test_read_access(char *addr, size_t size, size_t pagesize) > +{ > + size_t offs; > + int ret; > + > + if (signal(SIGSEGV, signal_handler) == SIG_ERR) > + return -EINVAL; > + > + ret = sigsetjmp(sigjmp_buf_env, 1); > + if (!ret) { > + for (offs = 0; offs < size; offs += pagesize) > + /* Force a read that the compiler cannot optimize out. > */ > + *((volatile char *)(addr + offs)); > + } > + if (signal(SIGSEGV, signal_handler) == SIG_ERR) > + return -EINVAL; > + > + return ret; > +} > + > +FIXTURE(pfnmap) > +{ > + size_t pagesize; > + int dev_mem_fd; > + char *addr1; > + size_t size1; > + char *addr2; > + size_t size2; > +}; > + > +FIXTURE_SETUP(pfnmap) > +{ > + self->pagesize = getpagesize(); > + > + self->dev_mem_fd = open("/dev/mem", O_RDONLY); > + if (self->dev_mem_fd < 0) > + SKIP(return, "Cannot open '/dev/mem'\n"); > + > + /* We'll require the first two pages throughout our tests ... */ > + self->size1 = self->pagesize * 2; > + self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, > + self->dev_mem_fd, 0); > + if (self->addr1 == MAP_FAILED) > + SKIP(return, "Cannot mmap '/dev/mem'\n"); > + > + /* ... and want to be able to read from them. */ > + if (test_read_access(self->addr1, self->size1, self->pagesize)) > + SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); > + > + self->size2 = 0; > + self->addr2 = MAP_FAILED; > +} > + > +FIXTURE_TEARDOWN(pfnmap) > +{ > + if (self->addr2 != MAP_FAILED) > + munmap(self->addr2, self->size2); > + if (self->addr1 != MAP_FAILED) > + munmap(self->addr1, self->size1); > + if (self->dev_mem_fd >= 0) > + close(self->dev_mem_fd); > +} > + > +TEST_F(pfnmap, madvise_disallowed) > +{ > + int advices[] = { > + MADV_DONTNEED, > + MADV_DONTNEED_LOCKED, > + MADV_FREE, > + MADV_WIPEONFORK, > + MADV_COLD, > + MADV_PAGEOUT, > + MADV_POPULATE_READ, > + MADV_POPULATE_WRITE, > + }; > + int i; > + > + /* All these advices must be rejected. */ > + for (i = 0; i < ARRAY_SIZE(advices); i++) { > + EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0); > + EXPECT_EQ(errno, EINVAL); > + } > +} > + > +TEST_F(pfnmap, munmap_split) > +{ > + /* > + * Unmap the first page. This munmap() call is not really expected to > + * fail, but we might be able to trigger other internal issues. > + */ > + ASSERT_EQ(munmap(self->addr1, self->pagesize), 0); > + > + /* > + * Remap the first page while the second page is still mapped. This > + * makes sure that any PAT tracking on x86 will allow for mmap()'ing > + * a page again while some parts of the first mmap() are still > + * around. > + */ > + self->size2 = self->pagesize; > + self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, > + self->dev_mem_fd, 0); > + ASSERT_NE(self->addr2, MAP_FAILED); > +} > + > +TEST_F(pfnmap, mremap_fixed) > +{ > + char *ret; > + > + /* Reserve a destination area. */ > + self->size2 = self->size1; > + self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE, > + -1, 0); > + ASSERT_NE(self->addr2, MAP_FAILED); > + > + /* mremap() over our destination. */ > + ret = mremap(self->addr1, self->size1, self->size2, > + MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2); > + ASSERT_NE(ret, MAP_FAILED); > +} > + > +TEST_F(pfnmap, mremap_shrink) > +{ > + char *ret; > + > + /* Shrinking is expected to work. */ > + ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0); > + ASSERT_NE(ret, MAP_FAILED); > +} > + > +TEST_F(pfnmap, mremap_expand) > +{ > + /* > + * Growing is not expected to work, and getting it right would > + * be challenging. So this test primarily serves as an early warning > + * that something that probably should never work suddenly works. > + */ > + self->size2 = self->size1 + self->pagesize; > + self->addr2 = mremap(self->addr1, self->size1, self->size2, > MREMAP_MAYMOVE); > + ASSERT_EQ(self->addr2, MAP_FAILED); > +} > + > +TEST_F(pfnmap, fork) > +{ > + pid_t pid; > + int ret; > + > + /* fork() a child and test if the child can access the pages. */ > + pid = fork(); > + ASSERT_GE(pid, 0); > + > + if (!pid) { > + EXPECT_EQ(test_read_access(self->addr1, self->size1, > + self->pagesize), 0); > + exit(0); > + } > + > + wait(&ret); > + if (WIFEXITED(ret)) > + ret = WEXITSTATUS(ret); > + else > + ret = -EINVAL; > + ASSERT_EQ(ret, 0); > +} > + > +TEST_HARNESS_MAIN > diff --git a/tools/testing/selftests/mm/run_vmtests.sh > b/tools/testing/selftests/mm/run_vmtests.sh > index 188b125bf1f6b..dddd1dd8af145 100755 > --- a/tools/testing/selftests/mm/run_vmtests.sh > +++ b/tools/testing/selftests/mm/run_vmtests.sh > @@ -63,6 +63,8 @@ separated by spaces: > test soft dirty page bit semantics > - pagemap > test pagemap_scan IOCTL > +- pfnmap > + tests for VM_PFNMAP handling > - cow > test copy-on-write semantics > - thp > @@ -472,6 +474,8 @@ fi > > CATEGORY="pagemap" run_test ./pagemap_ioctl > > +CATEGORY="pfnmap" run_test ./pfnmap > + > # COW tests > CATEGORY="cow" run_test ./cow > > -- > 2.49.0 >