On Sun, 2025-03-16 at 00:19 +0800, Tiwei Bie wrote:
> Implement a new virtual PCI driver based on the VFIO framework.
> This driver allows users to pass through PCI devices to UML via
> VFIO. Currently, only MSI-X capable devices are supported, and
> it is assumed that drivers will use MSI-X.

Seems nice, but I haven't tested it now :)

> +static irqreturn_t uml_vfio_interrupt(int unused, void *opaque)
> +{
> +     struct uml_vfio_intr_ctx *ctx = opaque;
> +     struct uml_vfio_device *dev = ctx->dev;
> +     int index = ctx - dev->intr_ctx;
> +     int irqfd = dev->udev.irqfd[index];
> +     int irq = dev->msix_data[index];
> +     uint64_t v;
> +     int r;
> +
> +     do {
> +             r = os_read_file(irqfd, &v, sizeof(v));
> +             if (r == sizeof(v))
> +                     generic_handle_irq(irq);
> +     } while (r == sizeof(v) || r == -EINTR);
> +     WARN(r != -EAGAIN, "read returned %d\n", r);
> +
> +     return IRQ_HANDLED;
> +}

This seems mostly right - looks like it's an eventfd.

> +     irqfd = uml_vfio_user_activate_irq(&dev->udev, index);
> +     if (irqfd < 0)
> +             return irqfd;
> +
> +     ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ,
> +                               uml_vfio_interrupt, IRQF_SHARED,
> +                               "vfio-uml", ctx);

However I'm not sure it's right with IRQF_SHARED since you don't detect
if it was actually not your interrupt that fired? We don't really have
any good reason to have shared interrupts anyway though, and I'm not
sure we even *can* share interrupts in the lower layers in ARCH=um, so
I'm not sure it matters?

> +     err = add_sigio_fd(irqfd);
> +     if (err)
> +             goto free_irq;

doesn't the irq framework do that automatically?

> +static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index)
> +{
> +     struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
> +
> +     if (ctx->irq >= 0) {
> +             ignore_sigio_fd(dev->udev.irqfd[index]);
> +             um_free_irq(ctx->irq, ctx);

and here too?

> +static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev,
> +                                 unsigned int offset, int size,
> +                                 unsigned long val)
> +{
> +     int err = 0;
> +
> +     if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) {
> +             switch (val & ~PCI_MSIX_FLAGS_QSIZE) {
> +             case PCI_MSIX_FLAGS_ENABLE:
> +             case 0:
> +                     err = uml_vfio_user_update_irqs(&dev->udev);
> +                     break;
> +             }
> +     }
> +
> +     return err;

do you really want to ignore and return 0 if it wasn't supported?

> +static int uml_vfio_update_msix_table(struct uml_vfio_device *dev,
> +                                   unsigned int offset, int size,
> +                                   unsigned long val)
> +{
> +     int index;
> +
> +     offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA;
> +
> +     if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0)
> +             return 0;

here too?

> +static void uml_vfio_cfgspace_write(struct um_pci_device *pdev,
> +                                 unsigned int offset, int size,
> +                                 unsigned long val)
> +{
> +     struct uml_vfio_device *dev = to_vdev(pdev);
> +
> +     if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF &&
> +                     offset + size > dev->msix_cap)

I'd probably indent the second line just to after "if ("?

> +     if (bar == dev->msix_bar && offset + size > dev->msix_offset &&
> +                     offset < dev->msix_offset + dev->msix_size)
> +             WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val));

likewise

> +static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap)
> +{
> +     u8 id, pos;
> +     u16 ent;
> +     int ttl = 48;

any particular significance in that value?

> +int uml_vfio_user_setup_iommu(int container)
> +{
> +     unsigned long reserved = uml_reserved - uml_physmem;
> +     struct vfio_iommu_type1_dma_map dma_map = {
> +             .argsz = sizeof(dma_map),
> +             .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
> +             .vaddr = uml_reserved,
> +             .iova = reserved,
> +             .size = physmem_size - reserved,
> +     };

maybe point over to the big comment in vhost_user_set_mem_table() from
virtio_uml.c? Same things apply here I guess.

johannes

Reply via email to