Hi David, > -----Original Message----- > From: David Marchand <david.march...@redhat.com> > Sent: Monday, August 21, 2023 7:36 PM > To: dev@dpdk.org > Cc: tho...@monjalon.net; ferruh.yi...@amd.com; Xia, Chenbo > <chenbo....@intel.com>; nipun.gu...@amd.com; Richardson, Bruce > <bruce.richard...@intel.com>; Burakov, Anatoly <anatoly.bura...@intel.com>; > Jay Zhou <jianjay.z...@huawei.com>; McDaniel, Timothy > <timothy.mcdan...@intel.com>; Julien Aube <julien_d...@jaube.fr>; Rahul > Lakkireddy <rahul.lakkire...@chelsio.com>; Guo, Junfeng > <junfeng....@intel.com>; Jeroen de Borst <jeroe...@google.com>; Rushil > Gupta <rush...@google.com>; Joshua Washington <joshw...@google.com>; > Dongdong Liu <liudongdo...@huawei.com>; Yisen Zhuang > <yisen.zhu...@huawei.com>; Maxime Coquelin <maxime.coque...@redhat.com>; > Gaetan Rivet <gr...@u256.net> > Subject: [PATCH v2 04/15] bus/pci: find PCI capability > > Introduce two helpers so that drivers stop reinventing the wheel when it > comes to finding capabilities in a device PCI configuration space. > Use it in existing drivers. > > Note: > - base/ drivers code is left untouched, only some wrappers in cxgbe > are touched, > - bnx2x maintained a per device cache of capabilities, this code has been > reworked to only cache the capabilities used in this driver, > > Signed-off-by: David Marchand <david.march...@redhat.com> > Acked-by: Bruce Richardson <bruce.richard...@intel.com> > --- > Changes since v1: > - updated commitlog, > - separated VFIO changes for using standard PCI helper in a separate > patch, > - marked new experimental symbols with current version, > - reordered defines in rte_pci.h, > > --- > drivers/bus/pci/linux/pci_vfio.c | 74 ++++-------------- > drivers/bus/pci/pci_common.c | 45 +++++++++++ > drivers/bus/pci/rte_bus_pci.h | 31 ++++++++ > drivers/bus/pci/version.map | 4 + > drivers/crypto/virtio/virtio_pci.c | 57 +++++--------- > drivers/event/dlb2/pf/dlb2_main.c | 42 +--------- > drivers/net/bnx2x/bnx2x.c | 41 +++++----- > drivers/net/cxgbe/base/adapter.h | 28 +------ > drivers/net/gve/gve_ethdev.c | 46 ++--------- > drivers/net/gve/gve_ethdev.h | 4 - > drivers/net/hns3/hns3_ethdev_vf.c | 79 +++---------------- > drivers/net/virtio/virtio_pci.c | 121 +++++------------------------ > lib/pci/rte_pci.h | 11 +++ > 13 files changed, 186 insertions(+), 397 deletions(-) > > diff --git a/drivers/bus/pci/linux/pci_vfio.c > b/drivers/bus/pci/linux/pci_vfio.c > index 958f8b3b52..614ed5d696 100644 > --- a/drivers/bus/pci/linux/pci_vfio.c > +++ b/drivers/bus/pci/linux/pci_vfio.c > @@ -110,74 +110,34 @@ static int > pci_vfio_get_msix_bar(const struct rte_pci_device *dev, > struct pci_msix_table *msix_table) > { > - int ret; > - uint32_t reg; > - uint16_t flags; > - uint8_t cap_id, cap_offset; > + off_t cap_offset; > > - /* read PCI capability pointer from config space */ > - ret = rte_pci_read_config(dev, ®, sizeof(reg), > PCI_CAPABILITY_LIST); > - if (ret != sizeof(reg)) { > - RTE_LOG(ERR, EAL, > - "Cannot read capability pointer from PCI config > space!\n"); > + cap_offset = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX);
I notice in some cases we use rte_pci_has_capability_list() to check first, then looking for specific cap, in other cases we don't use rte_pci_has_capability_list(). Since we define this API, should we always do the check? > + if (cap_offset < 0) > return -1; > - } > > - /* we need first byte */ > - cap_offset = reg & 0xFF; > + if (cap_offset != 0) { > + uint16_t flags; > + uint32_t reg; > > - while (cap_offset) { > - > - /* read PCI capability ID */ > - ret = rte_pci_read_config(dev, ®, sizeof(reg), cap_offset); > - if (ret != sizeof(reg)) { > + /* table offset resides in the next 4 bytes */ > + if (rte_pci_read_config(dev, ®, sizeof(reg), cap_offset + 4) > < 0) { > RTE_LOG(ERR, EAL, > - "Cannot read capability ID from PCI config > space!\n"); > + "Cannot read MSIX table from PCI config > space!\n"); > return -1; > } > > - /* we need first byte */ > - cap_id = reg & 0xFF; > - > - /* if we haven't reached MSI-X, check next capability */ > - if (cap_id != PCI_CAP_ID_MSIX) { > - ret = rte_pci_read_config(dev, ®, sizeof(reg), > cap_offset); > - if (ret != sizeof(reg)) { > - RTE_LOG(ERR, EAL, > - "Cannot read capability pointer from PCI > config space!\n"); > - return -1; > - } > - > - /* we need second byte */ > - cap_offset = (reg & 0xFF00) >> 8; > - > - continue; > + if (rte_pci_read_config(dev, &flags, sizeof(flags), cap_offset > + 2) < 0) { > + RTE_LOG(ERR, EAL, > + "Cannot read MSIX flags from PCI config > space!\n"); > + return -1; > } > - /* else, read table offset */ > - else { > - /* table offset resides in the next 4 bytes */ > - ret = rte_pci_read_config(dev, ®, sizeof(reg), > cap_offset + 4); > - if (ret != sizeof(reg)) { > - RTE_LOG(ERR, EAL, > - "Cannot read table offset from PCI > config > space!\n"); > - return -1; > - } > - > - ret = rte_pci_read_config(dev, &flags, sizeof(flags), > cap_offset + 2); > - if (ret != sizeof(flags)) { > - RTE_LOG(ERR, EAL, > - "Cannot read table flags from PCI config > space!\n"); > - return -1; > - } > - > - msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR; > - msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; > - msix_table->size = > - 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); > > - return 0; > - } > + msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR; > + msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; > + msix_table->size = 16 * (1 + (flags & > RTE_PCI_MSIX_FLAGS_QSIZE)); > } > + > return 0; > } > > diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c > index 382b0b8946..52272617eb 100644 > --- a/drivers/bus/pci/pci_common.c > +++ b/drivers/bus/pci/pci_common.c > @@ -813,6 +813,51 @@ rte_pci_get_iommu_class(void) > return iova_mode; > } > > +bool > +rte_pci_has_capability_list(const struct rte_pci_device *dev) > +{ > + uint16_t status; > + > + if (rte_pci_read_config(dev, &status, sizeof(status), > RTE_PCI_STATUS) != sizeof(status)) > + return false; > + > + return (status & RTE_PCI_STATUS_CAP_LIST) != 0; > +} > + > +off_t > +rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap) > +{ > + off_t offset; > + uint8_t pos; > + int ttl; > + > + offset = RTE_PCI_CAPABILITY_LIST; > + ttl = (RTE_PCI_CFG_SPACE_SIZE - RTE_PCI_STD_HEADER_SIZEOF) / > RTE_PCI_CAP_SIZEOF; > + > + if (rte_pci_read_config(dev, &pos, sizeof(pos), offset) < 0) > + return -1; > + > + while (pos && ttl--) { > + uint16_t ent; > + uint8_t id; > + > + offset = pos; > + if (rte_pci_read_config(dev, &ent, sizeof(ent), offset) < 0) > + return -1; > + > + id = ent & 0xff; > + if (id == 0xff) > + break; > + > + if (id == cap) > + return offset; > + > + pos = (ent >> 8); > + } > + > + return 0; > +} > + > off_t > rte_pci_find_ext_capability(const struct rte_pci_device *dev, uint32_t > cap) > { > diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h > index 75d0030eae..1ed33dbf3d 100644 > --- a/drivers/bus/pci/rte_bus_pci.h > +++ b/drivers/bus/pci/rte_bus_pci.h > @@ -68,6 +68,37 @@ void rte_pci_unmap_device(struct rte_pci_device *dev); > */ > void rte_pci_dump(FILE *f); > > +/** > + * Check whether this device has a PCI capability list. > + * > + * @param dev > + * A pointer to rte_pci_device structure. > + * > + * @return > + * true/false > + */ > +__rte_experimental > +bool rte_pci_has_capability_list(const struct rte_pci_device *dev); > + > +/** > + * Find device's PCI capability. > + * > + * @param dev > + * A pointer to rte_pci_device structure. > + * > + * @param cap > + * Capability to be found, which can be any from > + * RTE_PCI_CAP_ID_*, defined in librte_pci. > + * > + * @return > + * > 0: The offset of the next matching capability structure > + * within the device's PCI configuration space. > + * < 0: An error in PCI config space read. > + * = 0: Device does not support it. > + */ > +__rte_experimental > +off_t rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t > cap); > + > /** > * Find device's extended PCI capability. > * > diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map > index a0000f7938..2674f30235 100644 > --- a/drivers/bus/pci/version.map > +++ b/drivers/bus/pci/version.map > @@ -25,6 +25,10 @@ EXPERIMENTAL { > # added in 23.07 > rte_pci_mmio_read; > rte_pci_mmio_write; > + > + # added in 23.11 > + rte_pci_find_capability; > + rte_pci_has_capability_list; > }; > > INTERNAL { > diff --git a/drivers/crypto/virtio/virtio_pci.c > b/drivers/crypto/virtio/virtio_pci.c > index 95a43c8801..abc52b4701 100644 > --- a/drivers/crypto/virtio/virtio_pci.c > +++ b/drivers/crypto/virtio/virtio_pci.c > @@ -19,7 +19,6 @@ > * we can't simply include that header here, as there is no such > * file for non-Linux platform. > */ > -#define PCI_CAPABILITY_LIST 0x34 > #define PCI_CAP_ID_VNDR 0x09 > #define PCI_CAP_ID_MSIX 0x11 > > @@ -343,8 +342,9 @@ get_cfg_addr(struct rte_pci_device *dev, struct > virtio_pci_cap *cap) > static int > virtio_read_caps(struct rte_pci_device *dev, struct virtio_crypto_hw *hw) > { > - uint8_t pos; > struct virtio_pci_cap cap; > + uint16_t flags; > + off_t pos; > int ret; > > if (rte_pci_map_device(dev)) { > @@ -352,44 +352,26 @@ virtio_read_caps(struct rte_pci_device *dev, struct > virtio_crypto_hw *hw) > return -1; > } > > - ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST); > - if (ret < 0) { > - VIRTIO_CRYPTO_INIT_LOG_DBG("failed to read pci capability > list"); > - return -1; > + /* > + * Transitional devices would also have this capability, > + * that's why we also check if msix is enabled. > + */ > + pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX); > + if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags), > + pos + 2) == sizeof(flags)) { > + if (flags & PCI_MSIX_ENABLE) > + hw->use_msix = VIRTIO_MSIX_ENABLED; > + else > + hw->use_msix = VIRTIO_MSIX_DISABLED; > + } else { > + hw->use_msix = VIRTIO_MSIX_NONE; > } > > - while (pos) { > - ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos); > - if (ret < 0) { > - VIRTIO_CRYPTO_INIT_LOG_ERR( > - "failed to read pci cap at pos: %x", pos); > - break; > - } > - > - if (cap.cap_vndr == PCI_CAP_ID_MSIX) { > - /* Transitional devices would also have this capability, > - * that's why we also check if msix is enabled. > - * 1st byte is cap ID; 2nd byte is the position of next > - * cap; next two bytes are the flags. > - */ > - uint16_t flags = ((uint16_t *)&cap)[1]; > - > - if (flags & PCI_MSIX_ENABLE) > - hw->use_msix = VIRTIO_MSIX_ENABLED; > - else > - hw->use_msix = VIRTIO_MSIX_DISABLED; > - } > - > - if (cap.cap_vndr != PCI_CAP_ID_VNDR) { > - VIRTIO_CRYPTO_INIT_LOG_DBG( > - "[%2x] skipping non VNDR cap id: %02x", > - pos, cap.cap_vndr); > - goto next; > - } > - > + pos = rte_pci_find_capability(dev, PCI_CAP_ID_VNDR); The logic of vendor cap init seems incorrect. Virtio devices have multiple Vendor cap (different cfg type). But now the logic seems to only init the first one. > + if (pos > 0 && rte_pci_read_config(dev, &cap, sizeof(cap), pos) == > sizeof(cap)) { > VIRTIO_CRYPTO_INIT_LOG_DBG( > "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u", > - pos, cap.cfg_type, cap.bar, cap.offset, cap.length); > + (unsigned int)pos, cap.cfg_type, cap.bar, cap.offset, > cap.length); > > switch (cap.cfg_type) { > case VIRTIO_PCI_CAP_COMMON_CFG: > @@ -411,9 +393,6 @@ virtio_read_caps(struct rte_pci_device *dev, struct > virtio_crypto_hw *hw) > hw->isr = get_cfg_addr(dev, &cap); > break; > } > - > -next: > - pos = cap.cap_next; > } ... > diff --git a/drivers/net/virtio/virtio_pci.c > b/drivers/net/virtio/virtio_pci.c > index 29eb739b04..9fd9db3e03 100644 > --- a/drivers/net/virtio/virtio_pci.c > +++ b/drivers/net/virtio/virtio_pci.c > @@ -20,7 +20,6 @@ > * we can't simply include that header here, as there is no such > * file for non-Linux platform. > */ > -#define PCI_CAPABILITY_LIST 0x34 > #define PCI_CAP_ID_VNDR 0x09 > #define PCI_CAP_ID_MSIX 0x11 > > @@ -38,46 +37,16 @@ struct virtio_pci_internal > virtio_pci_internal[RTE_MAX_ETHPORTS]; > static enum virtio_msix_status > vtpci_msix_detect(struct rte_pci_device *dev) > { > - uint8_t pos; > - int ret; > + uint16_t flags; > + off_t pos; > > - ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST); > - if (ret != 1) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci capability list, ret %d", ret); > - return VIRTIO_MSIX_NONE; > - } > - > - while (pos) { > - uint8_t cap[2]; > - > - ret = rte_pci_read_config(dev, cap, sizeof(cap), pos); > - if (ret != sizeof(cap)) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci cap at pos: %x ret %d", > - pos, ret); > - break; > - } > - > - if (cap[0] == PCI_CAP_ID_MSIX) { > - uint16_t flags; > - > - ret = rte_pci_read_config(dev, &flags, sizeof(flags), > - pos + sizeof(cap)); > - if (ret != sizeof(flags)) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci cap at pos:" > - " %x ret %d", pos + 2, ret); > - break; > - } > - > - if (flags & PCI_MSIX_ENABLE) > - return VIRTIO_MSIX_ENABLED; > - else > - return VIRTIO_MSIX_DISABLED; > - } > - > - pos = cap[1]; > + pos = rte_pci_find_capability(dev, PCI_CAP_ID_MSIX); > + if (pos > 0 && rte_pci_read_config(dev, &flags, sizeof(flags), > + pos + 2) == sizeof(flags)) { > + if (flags & PCI_MSIX_ENABLE) > + return VIRTIO_MSIX_ENABLED; > + else > + return VIRTIO_MSIX_DISABLED; > } > > return VIRTIO_MSIX_NONE; > @@ -623,8 +592,8 @@ static int > virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw) > { > struct virtio_pci_dev *dev = virtio_pci_get_dev(hw); > - uint8_t pos; > struct virtio_pci_cap cap; > + off_t pos; > int ret; > > if (rte_pci_map_device(pci_dev)) { > @@ -632,72 +601,25 @@ virtio_read_caps(struct rte_pci_device *pci_dev, > struct virtio_hw *hw) > return -1; > } > > - ret = rte_pci_read_config(pci_dev, &pos, 1, PCI_CAPABILITY_LIST); > - if (ret != 1) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci capability list, ret %d", ret); > - return -1; > - } > - > - while (pos) { > - ret = rte_pci_read_config(pci_dev, &cap, 2, pos); > - if (ret != 2) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci cap at pos: %x ret %d", > - pos, ret); > - break; > - } > - > - if (cap.cap_vndr == PCI_CAP_ID_MSIX) { > - /* Transitional devices would also have this capability, > - * that's why we also check if msix is enabled. > - * 1st byte is cap ID; 2nd byte is the position of next > - * cap; next two bytes are the flags. > - */ > - uint16_t flags; > - > - ret = rte_pci_read_config(pci_dev, &flags, > sizeof(flags), > - pos + 2); > - if (ret != sizeof(flags)) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci cap at pos:" > - " %x ret %d", pos + 2, ret); > - break; > - } > - > - if (flags & PCI_MSIX_ENABLE) > - dev->msix_status = VIRTIO_MSIX_ENABLED; > - else > - dev->msix_status = VIRTIO_MSIX_DISABLED; > - } > - > - if (cap.cap_vndr != PCI_CAP_ID_VNDR) { > - PMD_INIT_LOG(DEBUG, > - "[%2x] skipping non VNDR cap id: %02x", > - pos, cap.cap_vndr); > - goto next; > - } > - > - ret = rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos); > - if (ret != sizeof(cap)) { > - PMD_INIT_LOG(DEBUG, > - "failed to read pci cap at pos: %x ret %d", > - pos, ret); > - break; > - } > + /* > + * Transitional devices would also have this capability, > + * that's why we also check if msix is enabled. > + */ > + dev->msix_status = vtpci_msix_detect(pci_dev); > > + pos = rte_pci_find_capability(pci_dev, PCI_CAP_ID_VNDR); > + if (pos > 0 && rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos) > == sizeof(cap)) { > PMD_INIT_LOG(DEBUG, > "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u", > - pos, cap.cfg_type, cap.bar, cap.offset, cap.length); > + (unsigned int)pos, cap.cfg_type, cap.bar, cap.offset, > cap.length); > Same comment about the vendor cap. Thanks, Chenbo > switch (cap.cfg_type) { > case VIRTIO_PCI_CAP_COMMON_CFG: > dev->common_cfg = get_cfg_addr(pci_dev, &cap); > break; > case VIRTIO_PCI_CAP_NOTIFY_CFG: > - ret = rte_pci_read_config(pci_dev, > - &dev->notify_off_multiplier, > - 4, pos + sizeof(cap)); > + ret = rte_pci_read_config(pci_dev, &dev- > >notify_off_multiplier, > + 4, pos + sizeof(cap)); > if (ret != 4) > PMD_INIT_LOG(DEBUG, > "failed to read notify_off_multiplier, > ret %d", > @@ -712,9 +634,6 @@ virtio_read_caps(struct rte_pci_device *pci_dev, > struct virtio_hw *hw) > dev->isr = get_cfg_addr(pci_dev, &cap); > break; > } > - > -next: > - pos = cap.cap_next; > } > > if (dev->common_cfg == NULL || dev->notify_base == NULL || > diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h > index aab761b918..49fd5b1d02 100644 > --- a/lib/pci/rte_pci.h > +++ b/lib/pci/rte_pci.h > @@ -28,13 +28,24 @@ extern "C" { > #define RTE_PCI_CFG_SPACE_SIZE 256 > #define RTE_PCI_CFG_SPACE_EXP_SIZE 4096 > > +#define RTE_PCI_STD_HEADER_SIZEOF 64 > + > +/* Standard register offsets in the PCI configuration space */ > #define RTE_PCI_VENDOR_ID 0x00 /* 16 bits */ > #define RTE_PCI_DEVICE_ID 0x02 /* 16 bits */ > #define RTE_PCI_COMMAND 0x04 /* 16 bits */ > +#define RTE_PCI_STATUS 0x06 /* 16 bits */ > +#define RTE_PCI_CAPABILITY_LIST 0x34 /* 32 bits */ > > /* PCI Command Register */ > #define RTE_PCI_COMMAND_MASTER 0x4 /* Bus Master Enable */ > > +/* PCI Status Register (RTE_PCI_STATUS) */ > +#define RTE_PCI_STATUS_CAP_LIST 0x10 /* Support Capability > List > */ > + > +/* Capability registers (RTE_PCI_CAPABILITY_LIST) */ > +#define RTE_PCI_CAP_SIZEOF 4 > + > /* PCI Express capability registers */ > #define RTE_PCI_EXP_DEVCTL 8 /* Device Control */ > > -- > 2.41.0