From: Manish Honap <[email protected]>

Exercise the user-visible contract added by CONFIG_VFIO_PCI_CXL:

  device_is_cxl                 GET_INFO returns VFIO_DEVICE_FLAGS_CXL
                                and a populated VFIO_DEVICE_INFO_CAP_CXL.

  hdm_region_mmap_rw            mmap() one page of the HDM region,
                                write a pattern, read it back.  Proves
                                the mmap fault handler's vmf_insert_pfn
                                path and the firmware-committed HPA
                                mapping.

  component_bar_sparse_mmap     GET_REGION_INFO on the component BAR
                                advertises a SPARSE_MMAP cap, and every
                                advertised mmappable area lies outside
                                [comp_reg_offset, +comp_reg_size).

  comp_regs_cm_cap_array_read   pread() of the COMP_REGS region at
                                CXL_CM_OFFSET returns a valid CM
                                cap-array header (CAP_ID == 1,
                                ARRAY_SIZE > 0).  Proves the
                                cxl_passthrough_cm_rw() dispatch is
                                wired.

  dvsec_lock_byte_read          pread() of the DVSEC CONFIG_LOCK byte
                                through the config-rw clipping shim
                                succeeds.  Proves the
                                cxl_passthrough_dvsec_rw() path is
                                wired.

COMMIT/COMMITTED state-machine and DVSEC LOCK latch behaviour are
out of scope for this smoke test.  No debugfs dependency.

Signed-off-by: Manish Honap <[email protected]>
---
 tools/testing/selftests/vfio/Makefile         |   1 +
 .../selftests/vfio/lib/vfio_pci_device.c      |  11 +-
 .../selftests/vfio/vfio_cxl_type2_test.c      | 350 ++++++++++++++++++
 3 files changed, 361 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/vfio/vfio_cxl_type2_test.c

diff --git a/tools/testing/selftests/vfio/Makefile 
b/tools/testing/selftests/vfio/Makefile
index 0684932d91bf..25f2a9420ef6 100644
--- a/tools/testing/selftests/vfio/Makefile
+++ b/tools/testing/selftests/vfio/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_PROGS += vfio_iommufd_setup_test
 TEST_GEN_PROGS += vfio_pci_device_test
 TEST_GEN_PROGS += vfio_pci_device_init_perf_test
 TEST_GEN_PROGS += vfio_pci_driver_test
+TEST_GEN_PROGS += vfio_cxl_type2_test
 
 TEST_FILES += scripts/cleanup.sh
 TEST_FILES += scripts/lib.sh
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c 
b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index fc75e04ef010..d2150129d854 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -281,7 +281,16 @@ static void vfio_pci_device_setup(struct vfio_pci_device 
*device)
                struct vfio_pci_bar *bar = device->bars + i;
 
                vfio_pci_region_get(device, i, &bar->info);
-               if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
+               /*
+                * Skip auto-mmap when the BAR advertises region-info caps
+                * (e.g. VFIO_REGION_INFO_CAP_SPARSE_MMAP).  Such BARs are
+                * only partially mmappable; the kernel rejects full-BAR
+                * mmaps and the caller must walk the sparse-area cap and
+                * mmap each advertised area separately.  Tests that need
+                * access to such a BAR handle the per-area mmap themselves.
+                */
+               if ((bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP) &&
+                   !(bar->info.flags & VFIO_REGION_INFO_FLAG_CAPS))
                        vfio_pci_bar_map(device, i);
        }
 
diff --git a/tools/testing/selftests/vfio/vfio_cxl_type2_test.c 
b/tools/testing/selftests/vfio/vfio_cxl_type2_test.c
new file mode 100644
index 000000000000..bc98a29f90ad
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_cxl_type2_test.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vfio_cxl_type2_test - smoke + dispatch tests for CXL Type-2 device
+ * passthrough through vfio-pci.
+ *
+ * Exercises the user-visible surface gated by CONFIG_VFIO_PCI_CXL:
+ *  - GET_INFO returns VFIO_DEVICE_FLAGS_CXL + a populated CAP_CXL.
+ *  - The HDM-backed VFIO region can be mmap'd and read/written.
+ *  - The component BAR exposes a SPARSE_MMAP cap that excludes the
+ *    CXL component register sub-range.
+ *  - The COMP_REGS region serves CM cap-array dwords from cxl-core's
+ *    snapshot (proves the cxl_passthrough_cm_rw() path is wired).
+ *  - DVSEC body reads through the config-rw clipping shim return the
+ *    cxl-core shadow (proves cxl_passthrough_dvsec_rw() is wired).
+ *
+ * Usage:
+ *   ./vfio_cxl_type2_test <BDF>
+ * or export VFIO_SELFTESTS_BDF=<BDF> before running.  The device must
+ * be bound to vfio-pci and the kernel must have CONFIG_VFIO_PCI_CXL=y.
+ *
+ * Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES.
+ */
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <cxl/cxl_regs.h>
+
+#include <libvfio.h>
+
+#include "kselftest_harness.h"
+
+#define PCI_DVSEC_VENDOR_ID_CXL                0x1e98
+#define PCI_DVSEC_ID_CXL_DEVICE                0x0000
+
+/*
+ * vfio-pci's region offset packing (kernel-internal in
+ * include/linux/vfio_pci_core.h, not exposed via UAPI as of writing).
+ * Provide local definitions so the selftest builds against the bare
+ * UAPI vfio.h.  The guards let a future kernel hoist these to UAPI
+ * without breaking this test.
+ */
+#ifndef VFIO_PCI_OFFSET_SHIFT
+#define VFIO_PCI_OFFSET_SHIFT          40
+#endif
+#ifndef VFIO_PCI_INDEX_TO_OFFSET
+#define VFIO_PCI_INDEX_TO_OFFSET(index)        ((uint64_t)(index) << 
VFIO_PCI_OFFSET_SHIFT)
+#endif
+
+static const char *device_bdf;
+
+/* Find a struct vfio_device_info capability by id in a GET_INFO buffer. */
+static const struct vfio_info_cap_header *
+find_device_cap(const void *buf, size_t bufsz, uint16_t id)
+{
+       const struct vfio_device_info *info = buf;
+       const struct vfio_info_cap_header *cap;
+       size_t off = info->cap_offset;
+
+       while (off && off < bufsz) {
+               cap = (const void *)((const char *)buf + off);
+               if (cap->id == id)
+                       return cap;
+               off = cap->next;
+       }
+       return NULL;
+}
+
+/* Walk PCI extended capability list for the CXL Device DVSEC. */
+static uint16_t find_cxl_dvsec(struct vfio_pci_device *dev)
+{
+       uint16_t pos = PCI_CFG_SPACE_SIZE;
+       int iter = 0;
+
+       while (pos && iter++ < 64) {
+               uint32_t hdr = vfio_pci_config_readl(dev, pos);
+               uint16_t cap_id = hdr & 0xffff;
+               uint16_t next   = (hdr >> 20) & 0xffc;
+               uint32_t hdr1, hdr2;
+
+               if (cap_id == PCI_EXT_CAP_ID_DVSEC) {
+                       hdr1 = vfio_pci_config_readl(dev, pos + 4);
+                       hdr2 = vfio_pci_config_readl(dev, pos + 8);
+                       if ((hdr1 & 0xffff) == PCI_DVSEC_VENDOR_ID_CXL &&
+                           (hdr2 & 0xffff) == PCI_DVSEC_ID_CXL_DEVICE)
+                               return pos;
+               }
+               pos = next;
+       }
+       return 0;
+}
+
+FIXTURE(cxl_type2) {
+       struct iommu *iommu;
+       struct vfio_pci_device *dev;
+
+       struct vfio_device_info_cap_cxl cxl_cap;
+       uint16_t dvsec_base;
+
+       uint64_t hdm_region_size;
+       uint64_t comp_regs_size;
+};
+
+FIXTURE_SETUP(cxl_type2)
+{
+       uint8_t infobuf[512] = {};
+       struct vfio_device_info *info = (void *)infobuf;
+       const struct vfio_device_info_cap_cxl *cap;
+       struct vfio_region_info ri = { .argsz = sizeof(ri) };
+
+       self->iommu = iommu_init(default_iommu_mode);
+       self->dev   = vfio_pci_device_init(device_bdf, self->iommu);
+
+       info->argsz = sizeof(infobuf);
+       ASSERT_EQ(0, ioctl(self->dev->fd, VFIO_DEVICE_GET_INFO, info));
+
+       if (!(info->flags & VFIO_DEVICE_FLAGS_CXL))
+               SKIP(return, "not a CXL Type-2 device");
+
+       cap = (const void *)find_device_cap(infobuf, sizeof(infobuf),
+                                           VFIO_DEVICE_INFO_CAP_CXL);
+       ASSERT_NE(NULL, cap);
+       memcpy(&self->cxl_cap, cap, sizeof(*cap));
+
+       ri.index = self->cxl_cap.hdm_region_idx;
+       ASSERT_EQ(0, ioctl(self->dev->fd, VFIO_DEVICE_GET_REGION_INFO, &ri));
+       self->hdm_region_size = ri.size;
+
+       ri.argsz = sizeof(ri);
+       ri.index = self->cxl_cap.comp_reg_region_idx;
+       ASSERT_EQ(0, ioctl(self->dev->fd, VFIO_DEVICE_GET_REGION_INFO, &ri));
+       self->comp_regs_size = ri.size;
+
+       self->dvsec_base = find_cxl_dvsec(self->dev);
+}
+
+FIXTURE_TEARDOWN(cxl_type2)
+{
+       vfio_pci_device_cleanup(self->dev);
+       iommu_cleanup(self->iommu);
+}
+
+TEST_F(cxl_type2, device_is_cxl)
+{
+       const struct vfio_device_info_cap_cxl *c = &self->cxl_cap;
+
+       ASSERT_EQ(VFIO_DEVICE_INFO_CAP_CXL, c->header.id);
+       ASSERT_EQ(1, c->header.version);
+       ASSERT_NE(c->hdm_region_idx, c->comp_reg_region_idx);
+       ASSERT_GE(c->hdm_region_idx,    VFIO_PCI_NUM_REGIONS);
+       ASSERT_GE(c->comp_reg_region_idx, VFIO_PCI_NUM_REGIONS);
+       ASSERT_LT(c->comp_reg_bar, PCI_STD_NUM_BARS);
+       ASSERT_GT(c->comp_reg_size, 0ULL);
+       ASSERT_EQ(c->comp_reg_size, self->comp_regs_size);
+}
+
+TEST_F(cxl_type2, hdm_region_mmap_rw)
+{
+       uint64_t off = (uint64_t)VFIO_PCI_INDEX_TO_OFFSET(
+               self->cxl_cap.hdm_region_idx);
+       uint32_t pattern = 0xdeadbeefU;
+       uint32_t readback = 0;
+       void *map;
+
+       if (self->hdm_region_size < SZ_4K)
+               SKIP(return, "HDM region < 4K");
+
+       map = mmap(NULL, SZ_4K, PROT_READ | PROT_WRITE, MAP_SHARED,
+                  self->dev->fd, off);
+       ASSERT_NE(MAP_FAILED, map);
+
+       *(volatile uint32_t *)map = pattern;
+       readback = *(volatile uint32_t *)map;
+       ASSERT_EQ(pattern, readback);
+
+       ASSERT_EQ(0, munmap(map, SZ_4K));
+}
+
+TEST_F(cxl_type2, component_bar_sparse_mmap)
+{
+       const uint8_t bar = self->cxl_cap.comp_reg_bar;
+       uint8_t buf[512] = {};
+       struct vfio_region_info *ri = (void *)buf;
+       const struct vfio_region_info_cap_sparse_mmap *sp;
+       const struct vfio_info_cap_header *hdr;
+       size_t off;
+       uint32_t i;
+
+       ri->argsz = sizeof(buf);
+       ri->index = bar;
+       ASSERT_EQ(0, ioctl(self->dev->fd, VFIO_DEVICE_GET_REGION_INFO, ri));
+
+       ASSERT_TRUE(ri->flags & VFIO_REGION_INFO_FLAG_CAPS);
+       off = ri->cap_offset;
+       hdr = NULL;
+       while (off && off < sizeof(buf)) {
+               hdr = (const void *)(buf + off);
+               if (hdr->id == VFIO_REGION_INFO_CAP_SPARSE_MMAP)
+                       break;
+               off = hdr->next;
+               hdr = NULL;
+       }
+       ASSERT_NE(NULL, hdr);
+       sp = (const void *)hdr;
+       ASSERT_GE(sp->nr_areas, 1U);
+       for (i = 0; i < sp->nr_areas; i++) {
+               uint64_t a_start = sp->areas[i].offset;
+               uint64_t a_end   = a_start + sp->areas[i].size;
+
+               ASSERT_TRUE(a_end <= self->cxl_cap.comp_reg_offset ||
+                           a_start >= self->cxl_cap.comp_reg_offset +
+                                      self->cxl_cap.comp_reg_size);
+       }
+}
+
+TEST_F(cxl_type2, comp_regs_cm_cap_array_read)
+{
+       uint64_t off = (uint64_t)VFIO_PCI_INDEX_TO_OFFSET(
+               self->cxl_cap.comp_reg_region_idx) + CXL_CM_OFFSET;
+       uint32_t hdr = 0;
+       uint16_t cap_id;
+       uint8_t  array_size;
+
+       ASSERT_EQ((ssize_t)sizeof(hdr),
+                 pread(self->dev->fd, &hdr, sizeof(hdr), off));
+
+       cap_id     = hdr & CXL_CM_CAP_HDR_ID_MASK;
+       array_size = (hdr & CXL_CM_CAP_HDR_ARRAY_SIZE_MASK) >> 24;
+       ASSERT_EQ(cap_id, CM_CAP_HDR_CAP_ID);
+       ASSERT_GT(array_size, 0);
+}
+
+TEST_F(cxl_type2, dvsec_lock_byte_read)
+{
+       uint8_t v;
+
+       if (!self->dvsec_base)
+               SKIP(return, "CXL Device DVSEC not found");
+
+       v = vfio_pci_config_readb(self->dev,
+                                 self->dvsec_base + 0x14);     /* CONFIG_LOCK 
*/
+       /* Snapshot value is host-firmware-dependent; just assert read
+        * succeeds (no SIGBUS, no -EIO).
+        */
+       (void)v;
+}
+
+/*
+ * Exercise the per-decoder COMMIT/COMMITTED state machine in
+ * cxl_passthrough_hdm_rw() (cxl-core).  Steps:
+ *
+ *   - Walk the CM cap-array via COMP_REGS reads to locate the HDM block.
+ *   - Read decoder 0 CTRL; for a firmware-committed Type-2 device both
+ *     COMMIT (bit 9) and COMMITTED (bit 10) are expected to be set.
+ *   - Release COMMIT by writing CTRL with bit 9 cleared.
+ *     Expected FSM transition: COMMITTED -> 0, LOCK_ON_COMMIT (bit 8) -> 0.
+ *   - Re-set COMMIT.  Expected: COMMITTED -> 1 (auto-set by the handler).
+ *   - Restore the original CTRL value so subsequent test runs see the
+ *     firmware-committed state.
+ *
+ * The CTRL writes touch the cxl-core shadow only — they do not reach
+ * the device — so the operation is safe to run repeatedly.
+ */
+TEST_F(cxl_type2, hdm_decoder_commit_fsm)
+{
+       uint64_t comp_off = (uint64_t)VFIO_PCI_INDEX_TO_OFFSET(
+               self->cxl_cap.comp_reg_region_idx);
+       uint32_t cm_hdr = 0, entry = 0;
+       uint64_t hdm_reg_offset = 0;
+       uint64_t ctrl_off;
+       uint32_t ctrl_orig, ctrl_test;
+       uint32_t array_size;
+       uint32_t i;
+
+       /* Discover HDM block offset via CM cap-array walk. */
+       ASSERT_EQ((ssize_t)sizeof(cm_hdr),
+                 pread(self->dev->fd, &cm_hdr, sizeof(cm_hdr),
+                       comp_off + CXL_CM_OFFSET));
+       ASSERT_EQ(CM_CAP_HDR_CAP_ID, cm_hdr & CXL_CM_CAP_HDR_ID_MASK);
+       array_size = (cm_hdr & CXL_CM_CAP_HDR_ARRAY_SIZE_MASK) >> 24;
+       ASSERT_GT(array_size, 0);
+
+       for (i = 1; i <= array_size; i++) {
+               ASSERT_EQ((ssize_t)sizeof(entry),
+                         pread(self->dev->fd, &entry, sizeof(entry),
+                               comp_off + CXL_CM_OFFSET + i * 4));
+               if ((entry & CXL_CM_CAP_HDR_ID_MASK) == CXL_CM_CAP_CAP_ID_HDM) {
+                       hdm_reg_offset = CXL_CM_OFFSET +
+                                        ((entry & CXL_CM_CAP_PTR_MASK) >> 20);
+                       break;
+               }
+       }
+       ASSERT_NE(0, hdm_reg_offset);
+
+       /* Read decoder 0 CTRL. */
+       ctrl_off = comp_off + hdm_reg_offset +
+                  CXL_HDM_DECODER0_CTRL_OFFSET(0);
+       ASSERT_EQ((ssize_t)sizeof(ctrl_orig),
+                 pread(self->dev->fd, &ctrl_orig, sizeof(ctrl_orig),
+                       ctrl_off));
+
+       /* Firmware-committed Type-2 device: COMMIT + COMMITTED both set. */
+       ASSERT_TRUE(ctrl_orig & BIT(9));        /* COMMIT */
+       ASSERT_TRUE(ctrl_orig & BIT(10));       /* COMMITTED */
+
+       /* Release COMMIT; FSM clears COMMITTED and LOCK_ON_COMMIT. */
+       ctrl_test = ctrl_orig & ~BIT(9);
+       ASSERT_EQ((ssize_t)sizeof(ctrl_test),
+                 pwrite(self->dev->fd, &ctrl_test, sizeof(ctrl_test),
+                        ctrl_off));
+       ASSERT_EQ((ssize_t)sizeof(ctrl_test),
+                 pread(self->dev->fd, &ctrl_test, sizeof(ctrl_test),
+                       ctrl_off));
+       ASSERT_FALSE(ctrl_test & BIT(9));       /* COMMIT cleared */
+       ASSERT_FALSE(ctrl_test & BIT(10));      /* COMMITTED auto-cleared */
+       ASSERT_FALSE(ctrl_test & BIT(8));       /* LOCK_ON_COMMIT auto-cleared 
*/
+
+       /* Re-set COMMIT; FSM auto-sets COMMITTED. */
+       ctrl_test = BIT(9);
+       ASSERT_EQ((ssize_t)sizeof(ctrl_test),
+                 pwrite(self->dev->fd, &ctrl_test, sizeof(ctrl_test),
+                        ctrl_off));
+       ASSERT_EQ((ssize_t)sizeof(ctrl_test),
+                 pread(self->dev->fd, &ctrl_test, sizeof(ctrl_test),
+                       ctrl_off));
+       ASSERT_TRUE(ctrl_test & BIT(9));        /* COMMIT */
+       ASSERT_TRUE(ctrl_test & BIT(10));       /* COMMITTED auto-set */
+
+       /* Restore the original CTRL value. */
+       ASSERT_EQ((ssize_t)sizeof(ctrl_orig),
+                 pwrite(self->dev->fd, &ctrl_orig, sizeof(ctrl_orig),
+                        ctrl_off));
+}
+
+int main(int argc, char *argv[])
+{
+       device_bdf = vfio_selftests_get_bdf(&argc, argv);
+       return test_harness_run(argc, argv);
+}
-- 
2.25.1


Reply via email to