On 2/11/2015 4:14 PM, Tetsuya Mukawa wrote: > On 2015/02/11 15:29, Qiu, Michael wrote: >> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote: >>> On 2015/02/11 13:53, Tetsuya Mukawa wrote: >>>> On 2015/02/11 12:27, Qiu, Michael wrote: >>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote: >>>>>>> -----Original Message----- >>>>>>> From: Qiu, Michael >>>>>>> Sent: Monday, February 9, 2015 1:10 PM >>>>>>> To: Tetsuya Mukawa; dev at dpdk.org >>>>>>> Cc: Iremonger, Bernard >>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address >>>>>>> comparison APIs >>>>>>> >>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote: >>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci >>>>>>>> addresses by eal_compare_pci_addr(). >>>>>>>> >>>>>>>> v5: >>>>>>>> - Fix pci_scan_one to handle pt_driver correctly. >>>>>>>> v4: >>>>>>>> - Fix calculation method of eal_compare_pci_addr(). >>>>>>>> - Add parameter checking. >>>>>>>> >>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp> >>>>>>>> --- >>>>>>>> lib/librte_eal/bsdapp/eal/eal_pci.c | 25 ++++++++--------------- >>>>>>>> lib/librte_eal/common/eal_common_pci.c | 2 +- >>>>>>>> lib/librte_eal/common/include/rte_pci.h | 34 >>>>>>>> +++++++++++++++++++++++++++++++ >>>>>>>> lib/librte_eal/linuxapp/eal/eal_pci.c | 25 ++++++++--------------- >>>>>>>> lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 2 +- >>>>>>>> 5 files changed, 54 insertions(+), 34 deletions(-) >>>>>>>> >>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>>>> index 74ecce7..c844d58 100644 >>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev) >>>>>>>> return (0); >>>>>>>> } >>>>>>>> >>>>>>>> -/* Compare two PCI device addresses. */ -static int >>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr >>>>>>>> *addr2) -{ >>>>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + >>>>>>>> (addr->devid << 8) + addr- >>>>>>>> function; >>>>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) >>>>>>>> + (addr2->devid << 8) + >>>>>>> addr2->function; >>>>>>>> - >>>>>>>> - if (dev_addr > dev_addr2) >>>>>>>> - return 1; >>>>>>>> - else >>>>>>>> - return 0; >>>>>>>> -} >>>>>>>> - >>>>>>>> - >>>>>>>> /* Scan one pci sysfs entry, and fill the devices list from it. */ >>>>>>>> static int pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@ >>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) >>>>>>>> } >>>>>>>> else { >>>>>>>> struct rte_pci_device *dev2 = NULL; >>>>>>>> + int ret; >>>>>>>> >>>>>>>> TAILQ_FOREACH(dev2, &pci_device_list, next) { >>>>>>>> - if (pci_addr_comparison(&dev->addr, >>>>>>>> &dev2->addr)) >>>>>>>> + ret = eal_compare_pci_addr(&dev->addr, >>>>>>>> &dev2->addr); >>>>>>>> + if (ret > 0) >>>>>>>> continue; >>>>>>>> - else { >>>>>>>> + else if (ret < 0) { >>>>>>>> TAILQ_INSERT_BEFORE(dev2, dev, next); >>>>>>>> return 0; >>>>>>>> + } else { /* already registered */ >>>>>>>> + /* update pt_driver */ >>>>>>>> + dev2->pt_driver = dev->pt_driver; >>>>>>>> + free(dev); >>>>>>>> + return 0; >>>>>>>> } >>>>>>>> } >>>>>>>> TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff >>>>>>>> --git >>>>>>>> a/lib/librte_eal/common/eal_common_pci.c >>>>>>>> b/lib/librte_eal/common/eal_common_pci.c >>>>>>>> index f3c7f71..a89f5c3 100644 >>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c >>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c >>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct >>>>>>>> rte_pci_device *dev) >>>>>>>> if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI && >>>>>>>> devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) >>>>>>>> continue; >>>>>>>> - if (!memcmp(&dev->addr, &devargs->pci.addr, >>>>>>>> sizeof(dev->addr))) >>>>>>>> + if (!eal_compare_pci_addr(&dev->addr, >>>>>>>> &devargs->pci.addr)) >>>>>>>> return devargs; >>>>>>>> } >>>>>>>> return NULL; >>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h >>>>>>>> b/lib/librte_eal/common/include/rte_pci.h >>>>>>>> index 7f2d699..4814cd7 100644 >>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h >>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h >>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct >>>>>>>> rte_pci_addr *dev_addr) } #undef GET_PCIADDR_FIELD >>>>>>>> >>>>>>>> +/* Compare two PCI device addresses. */ >>>>>>>> +/** >>>>>>>> + * Utility function to compare two PCI device addresses. >>>>>>>> + * >>>>>>>> + * @param addr >>>>>>>> + * The PCI Bus-Device-Function address to compare >>>>>>>> + * @param addr2 >>>>>>>> + * The PCI Bus-Device-Function address to compare >>>>>>>> + * @return >>>>>>>> + * 0 on equal PCI address. >>>>>>>> + * Positive on addr is greater than addr2. >>>>>>>> + * Negative on addr is less than addr2, or error. >>>>>>>> + */ >>>>>>>> +static inline int >>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr >>>>>>>> +*addr2) { >>>>>>>> + uint64_t dev_addr, dev_addr2; >>>>>>>> + >>>>>>>> + if ((addr == NULL) || (addr2 == NULL)) >>>>>>>> + return -1; >>>>>>>> + >>>>>>>> + dev_addr = (addr->domain << 24) | (addr->bus << 16) | >>>>>>>> + (addr->devid << 8) | addr->function; >>>>>>>> + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) | >>>>>>>> + (addr2->devid << 8) | addr2->function; >>>>>>>> + >>>>>>>> + if (dev_addr > dev_addr2) >>>>>>>> + return 1; >>>>>>>> + else if (dev_addr < dev_addr2) >>>>>>>> + return -1; >>>>>>>> + else >>>>>>>> + return 0; >>>>>>>> +} >>>>>>>> + >>>>>>>> /** >>>>>>>> * Probe the PCI bus for registered drivers. >>>>>>>> * >>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>>>> index c0ca5a5..d847102 100644 >>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>>>> @@ -229,20 +229,6 @@ error: >>>>>>>> return -1; >>>>>>>> } >>>>>>>> >>>>>>>> -/* Compare two PCI device addresses. */ -static int >>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr >>>>>>>> *addr2) -{ >>>>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + >>>>>>>> (addr->devid << 8) + addr- >>>>>>>> function; >>>>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) >>>>>>>> + (addr2->devid << 8) + >>>>>>> addr2->function; >>>>>>>> - >>>>>>>> - if (dev_addr > dev_addr2) >>>>>>>> - return 1; >>>>>>>> - else >>>>>>>> - return 0; >>>>>>>> -} >>>>>>>> - >>>>>>>> - >>>>>>>> /* Scan one pci sysfs entry, and fill the devices list from it. */ >>>>>>>> static int pci_scan_one(const char *dirname, uint16_t domain, uint8_t >>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t >>>>>>>> domain, uint8_t bus, >>>>>>>> } >>>>>>>> else { >>>>>>>> struct rte_pci_device *dev2 = NULL; >>>>>>>> + int ret; >>>>>>>> >>>>>>>> TAILQ_FOREACH(dev2, &pci_device_list, next) { >>>>>>>> - if (pci_addr_comparison(&dev->addr, >>>>>>>> &dev2->addr)) >>>>>>>> + ret = eal_compare_pci_addr(&dev->addr, >>>>>>>> &dev2->addr); >>>>>>>> + if (ret > 0) >>>>>>>> continue; >>>>>>>> - else { >>>>>>>> + else if (ret < 0) { >>>>>>>> TAILQ_INSERT_BEFORE(dev2, dev, next); >>>>>>>> return 0; >>>>>>>> + } else { /* already registered */ >>>>>>>> + /* update pt_driver */ >>>>>>>> + dev2->pt_driver = dev->pt_driver; >>>>>> Hi Tetsuya, >>>>>> >>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs >>>>>> is being lost in some scenarios. >>>>>> The following line should be added here: >>>>>> dev2->max_vfs = dev->max_vfs; >>>>>> >>>>>> numa_mode should probably be updated too (although it is not causing a >>>>>> problem at present). >>>>>> dev2->numa_mode = dev->numa_mode; >>>>> I'm very curious, why those field miss? I haven't see any places clear >>>>> this field. >>>>> >>>>> What is the root cause? >>>> Hi Michael, >>>> >>>> Here is my guess. >>>> The above function creates pci device list. >>> I am sorry. I forgot to add below information. >>> >>> "max_vfs" or "numa_node" value is came from sysfs when the above >>> function is processed. >> Yes, but it has already been registered, why it missed? > Yes, it has been registered already, but probably should be updated. > I guess sysfs value will be changed when igb_uio starts managing the device. > > ex) > 1. Boot linux > 2. start a dpdk application with no port. > 3. pci device list is registered. > - Here, "max_vfs" is came from sysfs. Or there is no such a entry. > 4. igb_uio binds the device. > 5. I guess max_vfs value of sysfs is changed. Or max_vfs entry is created. > 6. The dpdk application calls hotplug function.
Yes, agree. But numa node can be changed? Bernard, does your issue occur after max_vfs changed in igb_uio? If not, I think must be figure out the reason. Thanks, Michael > - Here, I guess we need to update "max_vfs" value. > > Above is a just my assumption. > It may be good to wait for Bernard's reply. > > Thanks, > Tetsuya > >> Thanks, >> Michael >>>> And current DPDK implementation assumes all devices needed to be managed >>>> are under igb_uio or vfio when above code is processed. >>>> To add hotplug function, we also need to think some devices will start >>>> to be managed under igb_uio or vfio after initializing pci device list. >>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio >>>> manages the device. >>>> >>>> Hi Bernard, >>>> >>>> Could you please check "max_vfs" and "num_node" values, then check the >>>> values again after the device is managed by igb_uio or vfio? >>>> In my environment, it seems max_vfs is created by igb_uio. >>>> But my NIC doesn't have VF, so behavior might be different in your >>>> environment. >>>> I guess "numa_node" should not be changed theoretically. >>>> >>>> If my guess is correct, how about replacing following values? >>>> - driver >>>> - max_vfs >>>> - resource >>>> - (numa_node) >>>> Except for above value, I guess other value shouldn't be changed even >>>> after the device is managed by igb_uio or vfio. >>>> >>>> Thanks, >>>> Tetsuya >>>> >>>>> Thanks, >>>>> Michael >>>>> >>>>>> Regards, >>>>>> >>>>>> Bernard. >>>>>> >>>>>> >>>>>> >>>>>> >>>>>>>> + free(dev); >>>>>>>> + return 0; >>>>>>>> } >>>>>>>> } >>>>>>>> TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff >>>>>>>> --git >>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>>>> index e53f06b..1da3507 100644 >>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) >>>>>>>> TAILQ_FOREACH(uio_res, pci_res_list, next) { >>>>>>>> >>>>>>>> /* skip this element if it doesn't match our PCI >>>>>>>> address */ >>>>>>>> - if (memcmp(&uio_res->pci_addr, &dev->addr, >>>>>>>> sizeof(dev->addr))) >>>>>>>> + if (eal_compare_pci_addr(&uio_res->pci_addr, >>>>>>>> &dev->addr)) >>>>>>>> continue; >>>>>>>> >>>>>>>> for (i = 0; i != uio_res->nb_maps; i++) { >>>>>>> Acked-by: Michael Qiu <michael.qiu at intel.com> > >