> Subject: Re: [Patch v6 01/18] net/mana: add basic driver, build environment > and doc > > > 在 2022/9/1 2:05, Long Li 写道: > >> Subject: Re: [Patch v6 01/18] net/mana: add basic driver, build > >> environment and doc > >> > >> > >> 在 2022/8/31 6:51, lon...@linuxonhyperv.com 写道: > >>> From: Long Li <lon...@microsoft.com> > >>> > >>> MANA is a PCI device. It uses IB verbs to access hardware through > >>> the kernel RDMA layer. This patch introduces build environment and > >>> basic device probe functions. > >>> > >>> Signed-off-by: Long Li <lon...@microsoft.com> > >>> --- > >>> Change log: > >>> v2: > >>> Fix typos. > >>> Make the driver build only on x86-64 and Linux. > >>> Remove unused header files. > >>> Change port definition to uint16_t or uint8_t (for IB). > >>> Use getline() in place of fgets() to read and truncate a line. > >>> v3: > >>> Add meson build check for required functions from RDMA direct verb > >>> header file > >>> v4: > >>> Remove extra "\n" in logging code. > >>> Use "r" in place of "rb" in fopen() to read text files. > >>> > >>> [snip] > >>> + > >>> +static int mana_pci_probe_mac(struct rte_pci_driver *pci_drv > >> __rte_unused, > >>> + struct rte_pci_device *pci_dev, > >>> + struct rte_ether_addr *mac_addr) { > >>> + struct ibv_device **ibv_list; > >>> + int ibv_idx; > >>> + struct ibv_context *ctx; > >>> + struct ibv_device_attr_ex dev_attr; > >>> + int num_devices; > >>> + int ret = 0; > >>> + uint8_t port; > >>> + struct mana_priv *priv = NULL; > >>> + struct rte_eth_dev *eth_dev = NULL; > >>> + bool found_port; > >>> + > >>> + ibv_list = ibv_get_device_list(&num_devices); > >>> + for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { > >>> + struct ibv_device *ibdev = ibv_list[ibv_idx]; > >>> + struct rte_pci_addr pci_addr; > >>> + > >>> + DRV_LOG(INFO, "Probe device name %s dev_name %s > >> ibdev_path %s", > >>> + ibdev->name, ibdev->dev_name, ibdev- > >>> ibdev_path); > >>> + > >>> + if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) > >>> + continue; > >>> + > >>> + /* Ignore if this IB device is not this PCI device */ > >>> + if (pci_dev->addr.domain != pci_addr.domain || > >>> + pci_dev->addr.bus != pci_addr.bus || > >>> + pci_dev->addr.devid != pci_addr.devid || > >>> + pci_dev->addr.function != pci_addr.function) > >>> + continue; > >>> + > >>> + ctx = ibv_open_device(ibdev); > >>> + if (!ctx) { > >>> + DRV_LOG(ERR, "Failed to open IB device %s", > >>> + ibdev->name); > >>> + continue; > >>> + } > >>> + > >>> + ret = ibv_query_device_ex(ctx, NULL, &dev_attr); > >>> + DRV_LOG(INFO, "dev_attr.orig_attr.phys_port_cnt %u", > >>> + dev_attr.orig_attr.phys_port_cnt); > >>> + found_port = false; > >>> + > >>> + for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; > >>> + port++) { > >>> + struct ibv_parent_domain_init_attr attr = {}; > >>> + struct rte_ether_addr addr; > >>> + char address[64]; > >>> + char name[RTE_ETH_NAME_MAX_LEN]; > >>> + > >>> + ret = get_port_mac(ibdev, port, &addr); > >>> + if (ret) > >>> + continue; > >>> + > >>> + if (mac_addr && !rte_is_same_ether_addr(&addr, > >> mac_addr)) > >>> + continue; > >>> + > >>> + rte_ether_format_addr(address, sizeof(address), > >> &addr); > >>> + DRV_LOG(INFO, "device located port %u address > >> %s", > >>> + port, address); > >>> + found_port = true; > >>> + > >>> + priv = rte_zmalloc_socket(NULL, sizeof(*priv), > >>> + RTE_CACHE_LINE_SIZE, > >>> + SOCKET_ID_ANY); > >>> + if (!priv) { > >>> + ret = -ENOMEM; > >>> + goto failed; > >>> + } > >>> + > >>> + snprintf(name, sizeof(name), "%s_port%d", > >>> + pci_dev->device.name, port); > >>> + > >>> + if (rte_eal_process_type() == > >> RTE_PROC_SECONDARY) { > >>> + int fd; > >>> + > >>> + eth_dev = > >> rte_eth_dev_attach_secondary(name); > >>> + if (!eth_dev) { > >>> + DRV_LOG(ERR, "Can't attach to dev > >> %s", > >>> + name); > >>> + ret = -ENOMEM; > >>> + goto failed; > >>> + } > >>> + > >>> + eth_dev->device = &pci_dev->device; > >>> + eth_dev->dev_ops = &mana_dev_sec_ops; > >>> + ret = mana_proc_priv_init(eth_dev); > >>> + if (ret) > >>> + goto failed; > >>> + priv->process_priv = eth_dev- > >>> process_private; > >>> + > >>> + /* Get the IB FD from the primary process */ > >>> + fd = > >> mana_mp_req_verbs_cmd_fd(eth_dev); > >>> + if (fd < 0) { > >>> + DRV_LOG(ERR, "Failed to get FD %d", > >> fd); > >>> + ret = -ENODEV; > >>> + goto failed; > >>> + } > >>> + > >>> + ret = > >> mana_map_doorbell_secondary(eth_dev, fd); > >>> + if (ret) { > >>> + DRV_LOG(ERR, "Failed secondary > >> map %d", > >>> + fd); > >>> + goto failed; > >>> + } > >>> + > >>> + /* fd is no not used after mapping doorbell */ > >>> + close(fd); > >>> + > >>> + rte_spinlock_lock(&mana_shared_data- > >>> lock); > >>> + mana_shared_data->secondary_cnt++; > >>> + mana_local_data.secondary_cnt++; > >>> + rte_spinlock_unlock(&mana_shared_data- > >>> lock); > >>> + > >>> + rte_eth_copy_pci_info(eth_dev, pci_dev); > >>> + rte_eth_dev_probing_finish(eth_dev); > >>> + > >>> + /* Impossible to have more than one port > >>> + * matching a MAC address > >>> + */ > >>> + continue; > >>> + } > >>> + > >>> + eth_dev = rte_eth_dev_allocate(name); > >>> + if (!eth_dev) { > >>> + ret = -ENOMEM; > >>> + goto failed; > >>> + } > >>> + > >>> + eth_dev->data->mac_addrs = > >>> + rte_calloc("mana_mac", 1, > >>> + sizeof(struct rte_ether_addr), 0); > >>> + if (!eth_dev->data->mac_addrs) { > >>> + ret = -ENOMEM; > >>> + goto failed; > >>> + } > >>> + > >>> + rte_ether_addr_copy(&addr, eth_dev->data- > >>> mac_addrs); > >>> + > >>> + priv->ib_pd = ibv_alloc_pd(ctx); > >>> + if (!priv->ib_pd) { > >>> + DRV_LOG(ERR, "ibv_alloc_pd failed port %d", > >> port); > >>> + ret = -ENOMEM; > >>> + goto failed; > >>> + } > >>> + > >>> + /* Create a parent domain with the port number */ > >>> + attr.pd = priv->ib_pd; > >>> + attr.comp_mask = > >> IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; > >>> + attr.pd_context = (void *)(uint64_t)port; > >>> + priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, > >> &attr); > >>> + if (!priv->ib_parent_pd) { > >>> + DRV_LOG(ERR, > >>> + "ibv_alloc_parent_domain failed port > >> %d", > >>> + port); > >>> + ret = -ENOMEM; > >>> + goto failed; > >>> + } > >>> + > >>> + priv->ib_ctx = ctx; > >>> + priv->port_id = eth_dev->data->port_id; > >>> + priv->dev_port = port; > >>> + eth_dev->data->dev_private = priv; > >>> + priv->dev_data = eth_dev->data; > >>> + > >>> + priv->max_rx_queues = dev_attr.orig_attr.max_qp; > >>> + priv->max_tx_queues = dev_attr.orig_attr.max_qp; > >>> + > >>> + priv->max_rx_desc = > >>> + RTE_MIN(dev_attr.orig_attr.max_qp_wr, > >>> + dev_attr.orig_attr.max_cqe); > >>> + priv->max_tx_desc = > >>> + RTE_MIN(dev_attr.orig_attr.max_qp_wr, > >>> + dev_attr.orig_attr.max_cqe); > >>> + > >>> + priv->max_send_sge = dev_attr.orig_attr.max_sge; > >>> + priv->max_recv_sge = dev_attr.orig_attr.max_sge; > >>> + > >>> + priv->max_mr = dev_attr.orig_attr.max_mr; > >>> + priv->max_mr_size = > >> dev_attr.orig_attr.max_mr_size; > >>> + > >>> + DRV_LOG(INFO, "dev %s max queues %d desc %d > >> sge %d", > >>> + name, priv->max_rx_queues, priv- > >>> max_rx_desc, > >>> + priv->max_send_sge); > >>> + > >>> + rte_spinlock_lock(&mana_shared_data->lock); > >>> + mana_shared_data->primary_cnt++; > >>> + rte_spinlock_unlock(&mana_shared_data->lock); > >>> + > >>> + eth_dev->data->dev_flags |= > >> RTE_ETH_DEV_INTR_RMV; > >>> + > >>> + eth_dev->device = &pci_dev->device; > >>> + eth_dev->data->dev_flags |= > >>> + RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; > >>> + > >> Please do not use the temporary macro. Please review this patch: > >> > >> f30e69b41f94 ("ethdev: add device flag to bypass auto-filled queue > >> xstats") > >> > >> This patch requires that per queue statistics are filled in > >> .xstats_get() by PMD. > > Thanks for pointing this out. > > > > It seems some PMDs are still depending on this flag for xstats. > > > > MANA doesn't implement xstats_get() currently, this flag is useful. Is it > okay to keep using this flag before it's finally the time to remove it from > all > PMDs, or when MANA implements xstats? > Yes, your xstats doesn't implement now. Per queue stats should be filled in > xstats API, and the stats API cannot see per queue stats, so stats API in > driver > shouldn't fill it(suggest that delete it from patch 17/18). > > I guess this flag can be removed if PMD does not support xstats. > > > >
I don't understand your suggestion. An application can call rte_eth_stats_get() to get port stats, and this will call into stats_get() in the driver, as implemented in patch 17/18. When flag RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS is set, an application can also use rte_eth_xstats_get() to get port stats even the driver doesn't implement xstats_get().