On Fri, Jul 25, 2025 at 02:24:33PM +0000, Mykyta Poturai wrote:
> From: Stewart Hildebrand <stewart.hildebr...@amd.com>
> 
> SR-IOV virtual functions have simplified configuration space such as
> Vendor ID is derived from the physical function and Device ID comes
> from SR-IOV extended capability.
> Emulate those, so we can provide VID/DID pair for guests which use PCI
> passthrough for SR-IOV virtual functions.
> 
> Emulate guest BAR register values based on PF BAR values for VFs.
> This allows creating a guest view of the normal BAR registers and emulates
> the size and properties as it is done during PCI device enumeration by
> the guest.
> 
> Note, that VFs ROM BAR is read-only and is all zeros, but VF may provide
> access to the PFs ROM via emulation and is not implemented.
> 
> Signed-off-by: Stewart Hildebrand <stewart.hildebr...@amd.com>
> Signed-off-by: Mykyta Poturai <mykyta_potu...@epam.com>
> ---
>  xen/drivers/vpci/sriov.c | 119 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 118 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/drivers/vpci/sriov.c b/xen/drivers/vpci/sriov.c
> index 640430e3e9..bf8d9763c6 100644
> --- a/xen/drivers/vpci/sriov.c
> +++ b/xen/drivers/vpci/sriov.c
> @@ -39,7 +39,8 @@ static int vf_init_bars(const struct pci_dev *vf_pdev)
>      for ( i = 0; i < PCI_SRIOV_NUM_BARS; i++ )
>      {
>          bars[i].addr = physfn_vf_bars[i].addr + vf_idx * 
> physfn_vf_bars[i].size;
> -        bars[i].guest_addr = bars[i].addr;
> +        if ( pf_pdev->domain == vf_pdev->domain || bars[i].guest_addr == 0 )
> +            bars[i].guest_addr = bars[i].addr;

Wouldn't this better be done based on the owner of the device being
the hardware domain?  This seems a bit ad-hoc and not quite obvious.

>          bars[i].size = physfn_vf_bars[i].size;
>          bars[i].type = physfn_vf_bars[i].type;
>          bars[i].prefetchable = physfn_vf_bars[i].prefetchable;
> @@ -166,6 +167,44 @@ static void cf_check control_write(const struct pci_dev 
> *pdev, unsigned int reg,
>      pci_conf_write16(pdev->sbdf, reg, val);
>  }
>  
> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> +static void cf_check vf_cmd_write(const struct pci_dev *pdev, unsigned int 
> reg,
> +                                  uint32_t cmd, void *data)
> +{
> +    struct vpci_header *header = data;
> +
> +    cmd |= PCI_COMMAND_INTX_DISABLE;
> +
> +    header->guest_cmd = cmd;
> +
> +    /*
> +     * Let Dom0 play with all the bits directly except for the memory
> +     * decoding one. Bits that are not allowed for DomU are already
> +     * handled above and by the rsvdp_mask.
> +     */
> +    if ( header->bars_mapped != !!(cmd & PCI_COMMAND_MEMORY) )
> +    {
> +        /*
> +         * Ignore the error. No memory has been added or removed from the p2m
> +         * (because the actual p2m changes are deferred in defer_map) and the
> +         * memory decoding bit has not been changed, so leave everything 
> as-is,
> +         * hoping the guest will realize and try again.
> +         */
> +        map_vf(pdev, cmd);
> +    }
> +    else
> +        pci_conf_write16(pdev->sbdf, reg, cmd);
> +}

This seems to be (mostly) a duplication of the existing cmd_write() in
header.c.  Is there anyway that we could use the same handler and
check for whether the device is a VF for any specific VF handling?

> +
> +static uint32_t cf_check vf_cmd_read(const struct pci_dev *pdev,
> +                                     unsigned int reg, void *data)
> +{
> +    const struct vpci_header *header = data;
> +
> +    return header->guest_cmd;
> +}

This is a verbatim duplicate of guest_cmd_read() in header.c.

> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
> +
>  static int vf_init_header(struct pci_dev *vf_pdev)
>  {
>      const struct pci_dev *pf_pdev;
> @@ -184,6 +223,84 @@ static int vf_init_header(struct pci_dev *vf_pdev)
>      sriov_pos = pci_find_ext_capability(pf_pdev->sbdf, PCI_EXT_CAP_ID_SRIOV);
>      ctrl = pci_conf_read16(pf_pdev->sbdf, sriov_pos + PCI_SRIOV_CTRL);
>  
> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> +    if ( pf_pdev->domain != vf_pdev->domain )
> +    {
> +        uint16_t vid = pci_conf_read16(pf_pdev->sbdf, PCI_VENDOR_ID);
> +        uint16_t did = pci_conf_read16(pf_pdev->sbdf,
> +                                       sriov_pos + PCI_SRIOV_VF_DID);
> +        struct vpci_bar *bars = vf_pdev->vpci->header.bars;
> +        unsigned int i;
> +
> +        rc = vpci_add_register(vf_pdev->vpci, vpci_read_val, NULL,
> +                               PCI_VENDOR_ID, 2, (void *)(uintptr_t)vid);
> +        if ( rc )
> +            return rc;
> +
> +        rc = vpci_add_register(vf_pdev->vpci, vpci_read_val, NULL,
> +                               PCI_DEVICE_ID, 2, (void *)(uintptr_t)did);
> +        if ( rc )
> +            return rc;
> +
> +        /* Hardcode multi-function device bit to 0 */
> +        rc = vpci_add_register(vf_pdev->vpci, vpci_read_val, NULL,
> +                               PCI_HEADER_TYPE, 1,
> +                               (void *)PCI_HEADER_TYPE_NORMAL);
> +        if ( rc )
> +            return rc;
> +
> +        rc = vpci_add_register(vf_pdev->vpci, vpci_hw_read32, NULL,
> +                               PCI_CLASS_REVISION, 4, NULL);
> +        if ( rc )
> +            return rc;
> +
> +        /* VF ROZ is covered by ro_mask */
> +        rc = vpci_add_register_mask(vf_pdev->vpci, vf_cmd_read, vf_cmd_write,
> +                                    PCI_COMMAND, 2, &vf_pdev->vpci->header,
> +                                    PCI_COMMAND_IO | PCI_COMMAND_SPECIAL |
> +                                    PCI_COMMAND_INVALIDATE |
> +                                    PCI_COMMAND_VGA_PALETTE | 
> PCI_COMMAND_WAIT |
> +                                    PCI_COMMAND_FAST_BACK,
> +                                    0,
> +                                    PCI_COMMAND_RSVDP_MASK |
> +                                    PCI_COMMAND_PARITY | PCI_COMMAND_SERR,
> +                                    0);
> +        if ( rc )
> +            return rc;
> +
> +        rc = vpci_init_capability_list(vf_pdev);
> +        if ( rc )
> +            return rc;
> +
> +        for ( i = 0; i < PCI_SRIOV_NUM_BARS; i++ )
> +        {
> +            switch ( pf_pdev->vpci->sriov->vf_bars[i].type )
> +            {
> +            case VPCI_BAR_MEM32:
> +            case VPCI_BAR_MEM64_LO:
> +            case VPCI_BAR_MEM64_HI:
> +                rc = vpci_add_register(vf_pdev->vpci, 
> vpci_guest_mem_bar_read,
> +                                       vpci_guest_mem_bar_write,
> +                                       PCI_BASE_ADDRESS_0 + i * 4, 4, 
> &bars[i]);
> +                if ( rc )
> +                    return rc;
> +                break;
> +            default:
> +                rc = vpci_add_register(vf_pdev->vpci, vpci_read_val, NULL,
> +                                       PCI_BASE_ADDRESS_0 + i * 4, 4,
> +                                       (void *)0);
> +                if ( rc )
> +                    return rc;
> +                break;
> +            }
> +        }
> +
> +        rc = vf_init_bars(vf_pdev);
> +        if ( rc )
> +            return rc;
> +    }
> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */

I think it would be better if the code in sr-iov.c would only deal
with accesses to the SR-IOV capability (so the hardware domain), while
the code to handle SR-IOV VFs was directly added to header.c.

Otherwise the work here will collide from the series by Jiqian that
attempts to clean up the mediation of capabilities done in vPCI:

https://lore.kernel.org/xen-devel/20250728050401.329510-1-jiqian.c...@amd.com/

Specifically see patch #2:

https://lore.kernel.org/xen-devel/20250728050401.329510-3-jiqian.c...@amd.com/

Which introduces vpci_init_capabilities(), and makes vPCI capability
initialization tied to the PCI device having the capability present.

Thanks, Roger.

Reply via email to