On 2025/7/21 23:48, Roger Pau Monné wrote:
> On Fri, Jul 04, 2025 at 03:07:58PM +0800, Jiqian Chen wrote:
>> When vpci fails to initialize a legacy capability of device, it just
>> returns an error and vPCI gets disabled for the whole device.  That
>> most likely renders the device unusable, plus possibly causing issues
>> to Xen itself if guest attempts to program the native MSI or MSI-X
>> capabilities if present.
>>
>> So, add new function to hide legacy capability when initialization
>> fails. And remove the failed legacy capability from the vpci emulated
>> legacy capability list.
>>
>> Signed-off-by: Jiqian Chen <jiqian.c...@amd.com>
>> ---
>> cc: "Roger Pau Monné" <roger....@citrix.com>
>> ---
>> v6->v7 changes:
>> * Change the pointer parameter of vpci_get_register(),
>>   vpci_get_previous_cap_register() and vpci_capability_hide() to be const.
>>
>> v5->v6 changes:
>> * Rename parameter rm to r in vpci_get_register().
>> * Use for loop to compact the code of vpci_get_previous_cap_register().
>> * Rename prev_next_r to prev_r in vpci_capability_hide(().
>> * Add printing when cap init, cleanup and hide fail.
>>
>> v4->v5 changes:
>> * Modify vpci_get_register() to delete some unnecessary check, so that
>>   I don't need to move function vpci_register_cmp().
>> * Rename vpci_capability_mask() to vpci_capability_hide().
>>
>> v3->v4 changes:
>> * Modify the commit message.
>> * In function vpci_get_previous_cap_register(), add an ASSERT_UNREACHABLE() 
>> if offset below 0x40.
>> * Modify vpci_capability_mask() to return error instead of using ASSERT.
>> * Use vpci_remove_register to remove PCI_CAP_LIST_ID register instead of 
>> open code.
>> * Add check "if ( !offset )" in vpci_capability_mask().
>>
>> v2->v3 changes:
>> * Separated from the last version patch "vpci: Hide capability when it fails 
>> to initialize"
>> * Whole implementation changed because last version is wrong.
>>   This version adds a new helper function vpci_get_register() and uses it to 
>> get
>>   target handler and previous handler from vpci->handlers, then remove the 
>> target.
>>
>> v1->v2 changes:
>> * Removed the "priorities" of initializing capabilities since it isn't used 
>> anymore.
>> * Added new function vpci_capability_mask() and vpci_ext_capability_mask() to
>>   remove failed capability from list.
>> * Called vpci_make_msix_hole() in the end of init_msix().
>>
>> Best regards,
>> Jiqian Chen.
>> ---
>>  xen/drivers/vpci/vpci.c | 109 +++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 108 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
>> index e7e5b64f1be4..a91c3d4a1415 100644
>> --- a/xen/drivers/vpci/vpci.c
>> +++ b/xen/drivers/vpci/vpci.c
>> @@ -83,6 +83,88 @@ static int assign_virtual_sbdf(struct pci_dev *pdev)
>>  
>>  #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
>>  
>> +static struct vpci_register *vpci_get_register(const struct vpci *vpci,
>> +                                               unsigned int offset,
>> +                                               unsigned int size)
>> +{
>> +    struct vpci_register *r;
>> +
>> +    ASSERT(spin_is_locked(&vpci->lock));
>> +
>> +    list_for_each_entry ( r, &vpci->handlers, node )
>> +    {
>> +        if ( r->offset == offset && r->size == size )
>> +            return r;
>> +
>> +        if ( offset <= r->offset )
>> +            break;
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +static struct vpci_register *vpci_get_previous_cap_register(
>> +    const struct vpci *vpci, unsigned int offset)
>> +{
>> +    uint32_t next;
>> +    struct vpci_register *r;
>> +
>> +    if ( offset < 0x40 )
>> +    {
>> +        ASSERT_UNREACHABLE();
>> +        return NULL;
>> +    }
>> +
>> +    for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r;
>> +          r = next >= 0x40 ? vpci_get_register(vpci,
>> +                                               next + PCI_CAP_LIST_NEXT, 1)
>> +                           : NULL )
>> +    {
>> +        next = (uint32_t)(uintptr_t)r->private;
> 
> Both 'next' type and the explicit truncation done here would better
> use "unsigned int" to match the type of the input offset parameter?
Make sense, will change.

> 
>> +        ASSERT(next == (uintptr_t)r->private);
>> +        if ( next == offset )
>> +            break;
>> +    }
>> +
>> +    return r;
>> +}
>> +
>> +static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int 
>> cap)
>> +{
>> +    const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap);
>> +    struct vpci_register *prev_r, *next_r;
>> +    struct vpci *vpci = pdev->vpci;
>> +
>> +    if ( !offset )
>> +    {
>> +        ASSERT_UNREACHABLE();
>> +        return 0;
>> +    }
>> +
>> +    spin_lock(&vpci->lock);
>> +    prev_r = vpci_get_previous_cap_register(vpci, offset);
>> +    next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
>> +    if ( !prev_r || !next_r )
>> +    {
>> +        spin_unlock(&vpci->lock);
>> +        return -ENODEV;
>> +    }
>> +
>> +    prev_r->private = next_r->private;
>> +    /*
>> +     * Not calling vpci_remove_register() here is to avoid redoing
>> +     * the register search
>> +     */
>> +    list_del(&next_r->node);
>> +    spin_unlock(&vpci->lock);
>> +    xfree(next_r);
>> +
>> +    if ( !is_hardware_domain(pdev->domain) )
>> +        return vpci_remove_register(vpci, offset + PCI_CAP_LIST_ID, 1);
>> +
>> +    return 0;
>> +}
>> +
>>  static int vpci_init_capabilities(struct pci_dev *pdev)
>>  {
>>      for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
>> @@ -103,7 +185,32 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
>>  
>>          rc = capability->init(pdev);
>>          if ( rc )
>> -            return rc;
>> +        {
>> +            const char *type = is_ext ? "extended" : "legacy";
>> +
>> +            printk(XENLOG_WARNING "%pd %pp: init %s cap %u fail rc=%d, mask 
>> it\n",
> 
> Nit: in order to avoid the strictly speaking overly long line here you
> could split it like:
> 
>             printk(XENLOG_WARNING
>                    "%pd %pp: init %s cap %u fail rc=%d, mask it\n",
>                    pdev->domain, ...
Will do.
> 
>> +                   pdev->domain, &pdev->sbdf, type, cap, rc);
>> +
>> +            if ( capability->cleanup )
>> +            {
>> +                rc = capability->cleanup(pdev);
>> +                if ( rc )
>> +                {
>> +                    printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail 
>> rc=%d\n",
>> +                           pdev->domain, &pdev->sbdf, type, cap, rc);
> 
> I think it would be fine to not return error here for the hardware
> domain, and try to mask the capability even if cleanup() has failed?
> 
> For the hardware domain it's likely better to not fail and attempt to
> mask, rather than fail and then end up exposing the device without any
> kind of vPCI mediation.  For domU the proposed behavior is fine.
Got it.
So here should be?
                if ( rc && !is_hardware_domain(pdev->domain) )
                {
                    printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n",
                           pdev->domain, &pdev->sbdf, type, cap, rc);
                    return rc;
                }
or
                if ( rc )
                {
                    printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n",
                           pdev->domain, &pdev->sbdf, type, cap, rc);
                    if ( !is_hardware_domain(pdev->domain) )
                        return rc;
                }

> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.

Reply via email to