On 2/11/2015 4:14 PM, Tetsuya Mukawa wrote:
> On 2015/02/11 15:29, Qiu, Michael wrote:
>> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
>>> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>>>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>>>> -----Original Message-----
>>>>>>> From: Qiu, Michael
>>>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>>>> To: Tetsuya Mukawa; dev at dpdk.org
>>>>>>> Cc: Iremonger, Bernard
>>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address 
>>>>>>> comparison APIs
>>>>>>>
>>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>>>> addresses by eal_compare_pci_addr().
>>>>>>>>
>>>>>>>> v5:
>>>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>>>> v4:
>>>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>>>> - Add parameter checking.
>>>>>>>>
>>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
>>>>>>>> ---
>>>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 
>>>>>>>> +++++++++++++++++++++++++++++++
>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> index 74ecce7..c844d58 100644
>>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>>>        return (0);
>>>>>>>>  }
>>>>>>>>
>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>> *addr2) -{
>>>>>>>> -      uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
>>>>>>>> (addr->devid << 8) + addr-
>>>>>>>> function;
>>>>>>>> -      uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) 
>>>>>>>> + (addr2->devid << 8) +
>>>>>>> addr2->function;
>>>>>>>> -
>>>>>>>> -      if (dev_addr > dev_addr2)
>>>>>>>> -              return 1;
>>>>>>>> -      else
>>>>>>>> -              return 0;
>>>>>>>> -}
>>>>>>>> -
>>>>>>>> -
>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>>>        }
>>>>>>>>        else {
>>>>>>>>                struct rte_pci_device *dev2 = NULL;
>>>>>>>> +              int ret;
>>>>>>>>
>>>>>>>>                TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>> -                      if (pci_addr_comparison(&dev->addr, 
>>>>>>>> &dev2->addr))
>>>>>>>> +                      ret = eal_compare_pci_addr(&dev->addr, 
>>>>>>>> &dev2->addr);
>>>>>>>> +                      if (ret > 0)
>>>>>>>>                                continue;
>>>>>>>> -                      else {
>>>>>>>> +                      else if (ret < 0) {
>>>>>>>>                                TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>                                return 0;
>>>>>>>> +                      } else { /* already registered */
>>>>>>>> +                              /* update pt_driver */
>>>>>>>> +                              dev2->pt_driver = dev->pt_driver;
>>>>>>>> +                              free(dev);
>>>>>>>> +                              return 0;
>>>>>>>>                        }
>>>>>>>>                }
>>>>>>>>                TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff 
>>>>>>>> --git
>>>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> index f3c7f71..a89f5c3 100644
>>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct 
>>>>>>>> rte_pci_device *dev)
>>>>>>>>                if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>>>                        devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>>>                        continue;
>>>>>>>> -              if (!memcmp(&dev->addr, &devargs->pci.addr, 
>>>>>>>> sizeof(dev->addr)))
>>>>>>>> +              if (!eal_compare_pci_addr(&dev->addr, 
>>>>>>>> &devargs->pci.addr))
>>>>>>>>                        return devargs;
>>>>>>>>        }
>>>>>>>>        return NULL;
>>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> index 7f2d699..4814cd7 100644
>>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>>>
>>>>>>>> +/* Compare two PCI device addresses. */
>>>>>>>> +/**
>>>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>>>> + *
>>>>>>>> + * @param addr
>>>>>>>> + *    The PCI Bus-Device-Function address to compare
>>>>>>>> + * @param addr2
>>>>>>>> + *    The PCI Bus-Device-Function address to compare
>>>>>>>> + * @return
>>>>>>>> + *    0 on equal PCI address.
>>>>>>>> + *    Positive on addr is greater than addr2.
>>>>>>>> + *    Negative on addr is less than addr2, or error.
>>>>>>>> + */
>>>>>>>> +static inline int
>>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>> +*addr2) {
>>>>>>>> +      uint64_t dev_addr, dev_addr2;
>>>>>>>> +
>>>>>>>> +      if ((addr == NULL) || (addr2 == NULL))
>>>>>>>> +              return -1;
>>>>>>>> +
>>>>>>>> +      dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>>>> +                              (addr->devid << 8) | addr->function;
>>>>>>>> +      dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>>>> +                              (addr2->devid << 8) | addr2->function;
>>>>>>>> +
>>>>>>>> +      if (dev_addr > dev_addr2)
>>>>>>>> +              return 1;
>>>>>>>> +      else if (dev_addr < dev_addr2)
>>>>>>>> +              return -1;
>>>>>>>> +      else
>>>>>>>> +              return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  /**
>>>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>>>   *
>>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> index c0ca5a5..d847102 100644
>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>>>        return -1;
>>>>>>>>  }
>>>>>>>>
>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>> *addr2) -{
>>>>>>>> -      uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
>>>>>>>> (addr->devid << 8) + addr-
>>>>>>>> function;
>>>>>>>> -      uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) 
>>>>>>>> + (addr2->devid << 8) +
>>>>>>> addr2->function;
>>>>>>>> -
>>>>>>>> -      if (dev_addr > dev_addr2)
>>>>>>>> -              return 1;
>>>>>>>> -      else
>>>>>>>> -              return 0;
>>>>>>>> -}
>>>>>>>> -
>>>>>>>> -
>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>>>> domain, uint8_t bus,
>>>>>>>>        }
>>>>>>>>        else {
>>>>>>>>                struct rte_pci_device *dev2 = NULL;
>>>>>>>> +              int ret;
>>>>>>>>
>>>>>>>>                TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>> -                      if (pci_addr_comparison(&dev->addr, 
>>>>>>>> &dev2->addr))
>>>>>>>> +                      ret = eal_compare_pci_addr(&dev->addr, 
>>>>>>>> &dev2->addr);
>>>>>>>> +                      if (ret > 0)
>>>>>>>>                                continue;
>>>>>>>> -                      else {
>>>>>>>> +                      else if (ret < 0) {
>>>>>>>>                                TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>                                return 0;
>>>>>>>> +                      } else { /* already registered */
>>>>>>>> +                              /* update pt_driver */
>>>>>>>> +                              dev2->pt_driver = dev->pt_driver;
>>>>>> Hi Tetsuya,
>>>>>>
>>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs 
>>>>>> is being lost in some scenarios.
>>>>>> The following line should be added here:
>>>>>>       dev2->max_vfs = dev->max_vfs;
>>>>>>
>>>>>> numa_mode should probably be updated too (although it is not causing a 
>>>>>> problem at present).
>>>>>>       dev2->numa_mode = dev->numa_mode;
>>>>> I'm very curious, why those field miss? I haven't see any places clear
>>>>> this field.
>>>>>
>>>>> What is the root cause?
>>>> Hi Michael,
>>>>
>>>> Here is my guess.
>>>> The above function creates pci device list.
>>> I am sorry. I forgot to add below information.
>>>
>>> "max_vfs" or "numa_node" value is came from sysfs when the above
>>> function is processed.
>> Yes, but it has already been registered, why it missed?
> Yes, it has been registered already, but probably should be updated.
> I guess sysfs value will be changed when igb_uio starts managing the device.
>
> ex)
> 1. Boot linux
> 2. start a dpdk application with no port.
> 3. pci device list is registered.
>  - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
> 4. igb_uio binds the device.
> 5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
> 6. The dpdk application calls hotplug function.

Yes, agree.

But numa node can be changed?

Bernard, does your issue occur after max_vfs changed in igb_uio?

If not, I think must be figure out the reason.

Thanks,
Michael
>  - Here, I guess we need to update "max_vfs" value.
>
> Above is a just my assumption.
> It may be good to wait for Bernard's reply.
>
> Thanks,
> Tetsuya
>
>> Thanks,
>> Michael
>>>> And current DPDK implementation assumes all devices needed to be managed
>>>> are under igb_uio or vfio when above code is processed.
>>>> To add hotplug function, we also need to think some devices will start
>>>> to be managed under igb_uio or vfio after initializing pci device list.
>>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>>>> manages the device.
>>>>
>>>> Hi Bernard,
>>>>
>>>> Could you please check "max_vfs" and "num_node" values, then check the
>>>> values again after the device is managed by igb_uio or vfio?
>>>> In my environment, it seems max_vfs is created by igb_uio.
>>>> But my NIC doesn't have VF, so behavior might be different in your
>>>> environment.
>>>> I guess "numa_node" should not be changed theoretically.
>>>>
>>>> If my guess is correct, how about replacing following values?
>>>> - driver
>>>> - max_vfs
>>>> - resource
>>>> - (numa_node)
>>>> Except for above value, I guess other value shouldn't be changed even
>>>> after the device is managed by igb_uio or vfio.
>>>>
>>>> Thanks,
>>>> Tetsuya
>>>>
>>>>> Thanks,
>>>>> Michael
>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> Bernard.
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>>> +                              free(dev);
>>>>>>>> +                              return 0;
>>>>>>>>                        }
>>>>>>>>                }
>>>>>>>>                TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff 
>>>>>>>> --git
>>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> index e53f06b..1da3507 100644
>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>>>        TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>>>
>>>>>>>>                /* skip this element if it doesn't match our PCI 
>>>>>>>> address */
>>>>>>>> -              if (memcmp(&uio_res->pci_addr, &dev->addr, 
>>>>>>>> sizeof(dev->addr)))
>>>>>>>> +              if (eal_compare_pci_addr(&uio_res->pci_addr, 
>>>>>>>> &dev->addr))
>>>>>>>>                        continue;
>>>>>>>>
>>>>>>>>                for (i = 0; i != uio_res->nb_maps; i++) {
>>>>>>> Acked-by: Michael Qiu <michael.qiu at intel.com>
>
>

Reply via email to