On 2015/02/11 13:53, Tetsuya Mukawa wrote:
> On 2015/02/11 12:27, Qiu, Michael wrote:
>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>> -----Original Message-----
>>>> From: Qiu, Michael
>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>> To: Tetsuya Mukawa; dev at dpdk.org
>>>> Cc: Iremonger, Bernard
>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison 
>>>> APIs
>>>>
>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>> addresses by eal_compare_pci_addr().
>>>>>
>>>>> v5:
>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>> v4:
>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>> - Add parameter checking.
>>>>>
>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
>>>>> ---
>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 
>>>>> +++++++++++++++++++++++++++++++
>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>
>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> index 74ecce7..c844d58 100644
>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>   return (0);
>>>>>  }
>>>>>
>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>> *addr2) -{
>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
>>>>> (addr->devid << 8) + addr-
>>>>> function;
>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + 
>>>>> (addr2->devid << 8) +
>>>> addr2->function;
>>>>> -
>>>>> - if (dev_addr > dev_addr2)
>>>>> -         return 1;
>>>>> - else
>>>>> -         return 0;
>>>>> -}
>>>>> -
>>>>> -
>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>   }
>>>>>   else {
>>>>>           struct rte_pci_device *dev2 = NULL;
>>>>> +         int ret;
>>>>>
>>>>>           TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>> -                 if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>> +                 ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>> +                 if (ret > 0)
>>>>>                           continue;
>>>>> -                 else {
>>>>> +                 else if (ret < 0) {
>>>>>                           TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>                           return 0;
>>>>> +                 } else { /* already registered */
>>>>> +                         /* update pt_driver */
>>>>> +                         dev2->pt_driver = dev->pt_driver;
>>>>> +                         free(dev);
>>>>> +                         return 0;
>>>>>                   }
>>>>>           }
>>>>>           TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>> index f3c7f71..a89f5c3 100644
>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct 
>>>>> rte_pci_device *dev)
>>>>>           if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>                   devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>                   continue;
>>>>> -         if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>> +         if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>                   return devargs;
>>>>>   }
>>>>>   return NULL;
>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>> index 7f2d699..4814cd7 100644
>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>
>>>>> +/* Compare two PCI device addresses. */
>>>>> +/**
>>>>> + * Utility function to compare two PCI device addresses.
>>>>> + *
>>>>> + * @param addr
>>>>> + *       The PCI Bus-Device-Function address to compare
>>>>> + * @param addr2
>>>>> + *       The PCI Bus-Device-Function address to compare
>>>>> + * @return
>>>>> + *       0 on equal PCI address.
>>>>> + *       Positive on addr is greater than addr2.
>>>>> + *       Negative on addr is less than addr2, or error.
>>>>> + */
>>>>> +static inline int
>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>> +*addr2) {
>>>>> + uint64_t dev_addr, dev_addr2;
>>>>> +
>>>>> + if ((addr == NULL) || (addr2 == NULL))
>>>>> +         return -1;
>>>>> +
>>>>> + dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>> +                         (addr->devid << 8) | addr->function;
>>>>> + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>> +                         (addr2->devid << 8) | addr2->function;
>>>>> +
>>>>> + if (dev_addr > dev_addr2)
>>>>> +         return 1;
>>>>> + else if (dev_addr < dev_addr2)
>>>>> +         return -1;
>>>>> + else
>>>>> +         return 0;
>>>>> +}
>>>>> +
>>>>>  /**
>>>>>   * Probe the PCI bus for registered drivers.
>>>>>   *
>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> index c0ca5a5..d847102 100644
>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> @@ -229,20 +229,6 @@ error:
>>>>>   return -1;
>>>>>  }
>>>>>
>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>> *addr2) -{
>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
>>>>> (addr->devid << 8) + addr-
>>>>> function;
>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + 
>>>>> (addr2->devid << 8) +
>>>> addr2->function;
>>>>> -
>>>>> - if (dev_addr > dev_addr2)
>>>>> -         return 1;
>>>>> - else
>>>>> -         return 0;
>>>>> -}
>>>>> -
>>>>> -
>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>> domain, uint8_t bus,
>>>>>   }
>>>>>   else {
>>>>>           struct rte_pci_device *dev2 = NULL;
>>>>> +         int ret;
>>>>>
>>>>>           TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>> -                 if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>> +                 ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>> +                 if (ret > 0)
>>>>>                           continue;
>>>>> -                 else {
>>>>> +                 else if (ret < 0) {
>>>>>                           TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>                           return 0;
>>>>> +                 } else { /* already registered */
>>>>> +                         /* update pt_driver */
>>>>> +                         dev2->pt_driver = dev->pt_driver;
>>> Hi Tetsuya,
>>>
>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is 
>>> being lost in some scenarios.
>>> The following line should be added here:
>>>       dev2->max_vfs = dev->max_vfs;
>>>
>>> numa_mode should probably be updated too (although it is not causing a 
>>> problem at present).
>>>       dev2->numa_mode = dev->numa_mode;
>> I'm very curious, why those field miss? I haven't see any places clear
>> this field.
>>
>> What is the root cause?
> Hi Michael,
>
> Here is my guess.
> The above function creates pci device list.

I am sorry. I forgot to add below information.

"max_vfs" or "numa_node" value is came from sysfs when the above
function is processed.

> And current DPDK implementation assumes all devices needed to be managed
> are under igb_uio or vfio when above code is processed.
> To add hotplug function, we also need to think some devices will start
> to be managed under igb_uio or vfio after initializing pci device list.
> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
> manages the device.
>
> Hi Bernard,
>
> Could you please check "max_vfs" and "num_node" values, then check the
> values again after the device is managed by igb_uio or vfio?
> In my environment, it seems max_vfs is created by igb_uio.
> But my NIC doesn't have VF, so behavior might be different in your
> environment.
> I guess "numa_node" should not be changed theoretically.
>
> If my guess is correct, how about replacing following values?
> - driver
> - max_vfs
> - resource
> - (numa_node)
> Except for above value, I guess other value shouldn't be changed even
> after the device is managed by igb_uio or vfio.
>
> Thanks,
> Tetsuya
>
>> Thanks,
>> Michael
>>
>>> Regards,
>>>
>>> Bernard.
>>>
>>>
>>>
>>>
>>>>> +                         free(dev);
>>>>> +                         return 0;
>>>>>                   }
>>>>>           }
>>>>>           TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> index e53f06b..1da3507 100644
>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>   TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>
>>>>>           /* skip this element if it doesn't match our PCI address */
>>>>> -         if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>> +         if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>                   continue;
>>>>>
>>>>>           for (i = 0; i != uio_res->nb_maps; i++) {
>>>> Acked-by: Michael Qiu <michael.qiu at intel.com>
>

Reply via email to