David Matlack <[email protected]> writes: > Add a driver to VFIO selftests for Intel DSA devices. > > For now the driver only supports up to 32 batches and 1024 copies per > batch, which were the limits of the hardware this commit was tested > with. This is sufficient to generate 9+ minutes of DMA memcpys at a rate > of over 30 GB/s. This should be plenty to stress test VFIO and the IOMMU. > > The driver does not yet support requesting interrupt handles, as this > commit was not tested against hardware that requires it. >
Just some questions, for the code: Acked-by: Vinicius Costa Gomes <[email protected]> > Cc: Vinicius Costa Gomes <[email protected]> > Cc: Dave Jiang <[email protected]> > Signed-off-by: David Matlack <[email protected]> > --- > .../selftests/vfio/lib/drivers/dsa/dsa.c | 416 ++++++++++++++++++ > .../vfio/lib/drivers/dsa/registers.h | 1 + > tools/testing/selftests/vfio/lib/libvfio.mk | 1 + > .../selftests/vfio/lib/vfio_pci_driver.c | 2 + > 4 files changed, 420 insertions(+) > create mode 100644 tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c > create mode 120000 tools/testing/selftests/vfio/lib/drivers/dsa/registers.h > > diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c > b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c > new file mode 100644 > index 000000000000..0ca2cbc2a316 > --- /dev/null > +++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c > @@ -0,0 +1,416 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +#include <stdint.h> > +#include <unistd.h> > + > +#include <linux/bits.h> > +#include <linux/errno.h> > +#include <linux/idxd.h> > +#include <linux/io.h> > +#include <linux/pci_ids.h> > +#include <linux/sizes.h> > + > +#include <vfio_util.h> > + > +#include "registers.h" > + > +/* Vectors 1+ are available for work queue completion interrupts. */ > +#define MSIX_VECTOR 1 > + > +struct dsa_state { > + /* Descriptors for copy and batch operations. */ > + struct dsa_hw_desc batch[32]; > + struct dsa_hw_desc copy[1024]; > + > + /* Completion records for copy and batch operations. */ > + struct dsa_completion_record copy_completion; > + struct dsa_completion_record batch_completion; > + > + /* Cached device registers (and derived data) for easy access */ > + union gen_cap_reg gen_cap; > + union wq_cap_reg wq_cap; > + union group_cap_reg group_cap; > + union engine_cap_reg engine_cap; > + union offsets_reg table_offsets; > + void *wqcfg_table; > + void *grpcfg_table; > + u64 max_batches; > + u64 max_copies_per_batch; > + > + /* The number of ongoing memcpy operations. */ > + u64 memcpy_count; > + > + /* Buffers used by dsa_send_msi() to generate an interrupt */ > + u64 send_msi_src; > + u64 send_msi_dst; > +}; > + > +static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device) > +{ > + return device->driver.region.vaddr; > +} > + > +static bool dsa_int_handle_request_required(struct vfio_pci_device *device) > +{ > + void *bar0 = device->bars[0].vaddr; > + union gen_cap_reg gen_cap; > + u32 cmd_cap; > + > + gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); > + if (!gen_cap.cmd_cap) > + return false; > + > + cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET); > + return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1; > +} > + > +static int dsa_probe(struct vfio_pci_device *device) > +{ > + if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL, > + PCI_DEVICE_ID_INTEL_DSA_SPR0)) What are you thinking about adding support for multiple device ids? > + return -EINVAL; > + > + if (dsa_int_handle_request_required(device)) { > + printf("Device requires requesting interrupt handles\n"); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static void dsa_check_sw_err(struct vfio_pci_device *device) > +{ > + void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET; > + union sw_err_reg err = {}; > + int i; > + > + for (i = 0; i < ARRAY_SIZE(err.bits); i++) { > + err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i])); > + > + /* No errors */ > + if (i == 0 && !err.valid) > + return; > + } > + > + fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", > + err.bits[0], err.bits[1], err.bits[2], err.bits[3]); > + > + fprintf(stderr, " valid: 0x%x\n", err.valid); > + fprintf(stderr, " overflow: 0x%x\n", err.overflow); > + fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid); > + fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid); > + fprintf(stderr, " batch: 0x%x\n", err.batch); > + fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw); > + fprintf(stderr, " priv: 0x%x\n", err.priv); > + fprintf(stderr, " error: 0x%x\n", err.error); > + fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx); > + fprintf(stderr, " operation: 0x%x\n", err.operation); > + fprintf(stderr, " pasid: 0x%x\n", err.pasid); > + fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx); > + fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags); > + fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr); > + > + VFIO_FAIL("Software Error Detected!\n"); > +} > + > +static void dsa_command(struct vfio_pci_device *device, u32 cmd) > +{ > + union idxd_command_reg cmd_reg = { .cmd = cmd }; > + u32 sleep_ms = 1, attempts = 5000 / sleep_ms; > + void *bar0 = device->bars[0].vaddr; > + u32 status; > + u8 err; > + > + writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET); > + > + for (;;) { > + dsa_check_sw_err(device); > + > + status = readl(bar0 + IDXD_CMDSTS_OFFSET); > + if (!(status & IDXD_CMDSTS_ACTIVE)) > + break; > + > + VFIO_ASSERT_GT(--attempts, 0); > + usleep(sleep_ms * 1000); > + } > + > + err = status & IDXD_CMDSTS_ERR_MASK; > + VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err); > +} > + > +static void dsa_wq_init(struct vfio_pci_device *device) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + union wq_cap_reg wq_cap = dsa->wq_cap; > + union wqcfg wqcfg; > + u64 wqcfg_size; > + int i; > + > + VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0); > + > + wqcfg = (union wqcfg) { > + .wq_size = wq_cap.total_wq_size, > + .mode = 1, > + .priority = 1, > + /* > + * Disable Address Translation Service (if enabled) so that VFIO > + * selftests using this driver can generate I/O page faults. > + */ > + .wq_ats_disable = wq_cap.wq_ats_support, > + .max_xfer_shift = dsa->gen_cap.max_xfer_shift, > + .max_batch_shift = dsa->gen_cap.max_batch_shift, > + .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH), > + }; > + > + wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN); > + > + for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++) > + writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, > bits[i])); > +} > + > +static void dsa_group_init(struct vfio_pci_device *device) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + union group_cap_reg group_cap = dsa->group_cap; > + union engine_cap_reg engine_cap = dsa->engine_cap; > + > + VFIO_ASSERT_GT((u32)group_cap.num_groups, 0); > + VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0); > + > + /* Assign work queue 0 and engine 0 to group 0 */ > + writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0])); > + writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines)); > +} > + > +static void dsa_register_cache_init(struct vfio_pci_device *device) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + void *bar0 = device->bars[0].vaddr; > + > + dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); > + dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET); > + dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET); > + dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET); > + > + dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET); > + dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8); > + > + dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT; > + dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT; > + > + dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN); > + dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch)); > + > + dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift; > + dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, > ARRAY_SIZE(dsa->copy)); > +} > + > +static void dsa_init(struct vfio_pci_device *device) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + > + VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa)); > + > + vfio_pci_config_writew(device, PCI_COMMAND, > + PCI_COMMAND_MEMORY | > + PCI_COMMAND_MASTER | > + PCI_COMMAND_INTX_DISABLE); > + > + dsa_command(device, IDXD_CMD_RESET_DEVICE); > + > + dsa_register_cache_init(device); > + dsa_wq_init(device); > + dsa_group_init(device); > + > + dsa_command(device, IDXD_CMD_ENABLE_DEVICE); > + dsa_command(device, IDXD_CMD_ENABLE_WQ); > + > + vfio_pci_msix_enable(device, MSIX_VECTOR, 1); > + > + device->driver.max_memcpy_count = > + dsa->max_batches * dsa->max_copies_per_batch; > + device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift; > + device->driver.msi = MSIX_VECTOR; > +} > + > +static void dsa_remove(struct vfio_pci_device *device) > +{ > + dsa_command(device, IDXD_CMD_RESET_DEVICE); > + vfio_pci_msix_disable(device); > +} > + > +static int dsa_completion_wait(struct vfio_pci_device *device, > + struct dsa_completion_record *completion) > +{ > + u8 status; > + > + for (;;) { > + dsa_check_sw_err(device); > + > + status = READ_ONCE(completion->status); > + if (status) > + break; > + > + usleep(1000); Another minor/thing to think about: using umonitor/umwait. > + } > + > + if (status == DSA_COMP_SUCCESS) > + return 0; > + > + printf("Error detected during memcpy operation: 0x%x\n", status); > + return -1; > +} > + > +static void dsa_copy_desc_init(struct vfio_pci_device *device, > + struct dsa_hw_desc *desc, > + iova_t src, iova_t dst, u64 size, > + bool interrupt) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + u16 flags; > + > + flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; > + > + if (interrupt) > + flags |= IDXD_OP_FLAG_RCI; > + > + *desc = (struct dsa_hw_desc) { > + .opcode = DSA_OPCODE_MEMMOVE, > + .flags = flags, > + .priv = 1, > + .src_addr = src, > + .dst_addr = dst, > + .xfer_size = size, > + .completion_addr = to_iova(device, &dsa->copy_completion), > + .int_handle = interrupt ? MSIX_VECTOR : 0, > + }; > +} > + > +static void dsa_batch_desc_init(struct vfio_pci_device *device, > + struct dsa_hw_desc *desc, > + u64 count) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + > + *desc = (struct dsa_hw_desc) { > + .opcode = DSA_OPCODE_BATCH, > + .flags = IDXD_OP_FLAG_CRAV, > + .priv = 1, > + .completion_addr = to_iova(device, &dsa->batch_completion), > + .desc_list_addr = to_iova(device, &dsa->copy[0]), > + .desc_count = count, > + }; > +} > + > +static void dsa_desc_write(struct vfio_pci_device *device, struct > dsa_hw_desc *desc) > +{ > + /* Write the contents (not address) of the 64-byte descriptor to the > device. */ > + iosubmit_cmds512(device->bars[2].vaddr, desc, 1); > +} > + > +static void dsa_memcpy_one(struct vfio_pci_device *device, > + iova_t src, iova_t dst, u64 size, bool interrupt) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + > + memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion)); > + > + dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt); > + dsa_desc_write(device, &dsa->copy[0]); > +} > + > +static void dsa_memcpy_batch(struct vfio_pci_device *device, > + iova_t src, iova_t dst, u64 size, u64 count) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + int i; > + > + memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion)); > + > + for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) { > + struct dsa_hw_desc *copy_desc = &dsa->copy[i]; > + > + dsa_copy_desc_init(device, copy_desc, src, dst, size, false); > + > + /* Don't request completions for individual copies. */ > + copy_desc->flags &= ~IDXD_OP_FLAG_RCR; > + } > + > + for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) { > + struct dsa_hw_desc *batch_desc = &dsa->batch[i]; > + int nr_copies; > + > + nr_copies = min(count, dsa->max_copies_per_batch); > + count -= nr_copies; > + > + /* > + * Batches must have at least 2 copies, so handle the case where > + * there is exactly 1 copy left by doing one less copy in this > + * batch and then 2 in the next. > + */ > + if (count == 1) { > + nr_copies--; > + count++; > + } > + > + dsa_batch_desc_init(device, batch_desc, nr_copies); > + > + /* Request a completion for the last batch. */ > + if (!count) > + batch_desc->flags |= IDXD_OP_FLAG_RCR; > + > + dsa_desc_write(device, batch_desc); > + } > + > + VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count); > +} > + > +static void dsa_memcpy_start(struct vfio_pci_device *device, > + iova_t src, iova_t dst, u64 size, u64 count) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + > + /* DSA devices require at least 2 copies per batch. */ > + if (count == 1) > + dsa_memcpy_one(device, src, dst, size, false); > + else > + dsa_memcpy_batch(device, src, dst, size, count); > + > + dsa->memcpy_count = count; > +} > + > +static int dsa_memcpy_wait(struct vfio_pci_device *device) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + int r; > + > + if (dsa->memcpy_count == 1) > + r = dsa_completion_wait(device, &dsa->copy_completion); > + else > + r = dsa_completion_wait(device, &dsa->batch_completion); > + > + dsa->memcpy_count = 0; > + > + return r; > +} > + > +static void dsa_send_msi(struct vfio_pci_device *device) > +{ > + struct dsa_state *dsa = to_dsa_state(device); > + > + dsa_memcpy_one(device, > + to_iova(device, &dsa->send_msi_src), > + to_iova(device, &dsa->send_msi_dst), > + sizeof(dsa->send_msi_src), true); > + > + VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0); > +} > + > +const struct vfio_pci_driver_ops dsa_ops = { > + .name = "dsa", > + .probe = dsa_probe, > + .init = dsa_init, > + .remove = dsa_remove, > + .memcpy_start = dsa_memcpy_start, > + .memcpy_wait = dsa_memcpy_wait, > + .send_msi = dsa_send_msi, > +}; > diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h > b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h > new file mode 120000 > index 000000000000..bde657c3c2af > --- /dev/null > +++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h > @@ -0,0 +1 @@ > +../../../../../../../drivers/dma/idxd/registers.h > \ No newline at end of file > diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk > b/tools/testing/selftests/vfio/lib/libvfio.mk > index 624dc267a879..5d11c3a89a28 100644 > --- a/tools/testing/selftests/vfio/lib/libvfio.mk > +++ b/tools/testing/selftests/vfio/lib/libvfio.mk > @@ -8,6 +8,7 @@ LIBVFIO_C += lib/vfio_pci_driver.c > > ifeq ($(ARCH:x86_64=x86),x86) > LIBVFIO_C += lib/drivers/ioat/ioat.c > +LIBVFIO_C += lib/drivers/dsa/dsa.c > endif > > LIBVFIO_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBVFIO_C)) > diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c > b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c > index aa47360e47a9..e5e8723ecb41 100644 > --- a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c > +++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c > @@ -5,11 +5,13 @@ > #include <vfio_util.h> > > #ifdef __x86_64__ > +extern struct vfio_pci_driver_ops dsa_ops; > extern struct vfio_pci_driver_ops ioat_ops; > #endif > > static struct vfio_pci_driver_ops *driver_ops[] = { > #ifdef __x86_64__ > + &dsa_ops, > &ioat_ops, > #endif > }; > -- > 2.50.0.rc2.701.gf1e915cc24-goog > -- Vinicius
