[PATCH] net: mana: add msix index sharing between EQs
From: Konstantin Taranov This patch allows to assign and poll more than 1 EQ on the same msix index. It is achieved by introducing a list of attached EQs in each IRQ context. This patch export symbols for creating EQs from other MANA kernel modules. Signed-off-by: Konstantin Taranov --- .../net/ethernet/microsoft/mana/gdma_main.c | 55 ++- .../net/ethernet/microsoft/mana/hw_channel.c | 1 + drivers/net/ethernet/microsoft/mana/mana_en.c | 1 + include/net/mana/gdma.h | 4 +- 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 6367de0..82a4534 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -401,6 +401,9 @@ static void mana_gd_process_eq_events(void *arg) u32 head, num_eqe; int i; + if (eq->id == INVALID_QUEUE_ID) + return; + gc = eq->gdma_dev->gdma_context; num_eqe = eq->queue_size / GDMA_EQE_SIZE; @@ -414,8 +417,12 @@ static void mana_gd_process_eq_events(void *arg) old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; /* No more entries */ - if (owner_bits == old_bits) + if (owner_bits == old_bits) { + /* return here without ringing the doorbell */ + if (i == 0) + return; break; + } new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; if (owner_bits != new_bits) { @@ -457,12 +464,16 @@ static int mana_gd_register_irq(struct gdma_queue *queue, spin_lock_irqsave(&r->lock, flags); - msi_index = find_first_zero_bit(r->map, r->size); + if (queue->eq.msix_index == INVALID_PCI_MSIX_INDEX) + queue->eq.msix_index = find_first_zero_bit(r->map, r->size); + + msi_index = queue->eq.msix_index; + if (msi_index >= r->size || msi_index >= gc->num_msix_usable) { err = -ENOSPC; + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; } else { bitmap_set(r->map, msi_index, 1); - queue->eq.msix_index = msi_index; } spin_unlock_irqrestore(&r->lock, flags); @@ -476,9 +487,7 @@ static int mana_gd_register_irq(struct gdma_queue *queue, gic = &gc->irq_contexts[msi_index]; - WARN_ON(gic->handler || gic->arg); - - gic->arg = queue; + list_add_rcu(&queue->entry, &gic->eq_list); gic->handler = mana_gd_process_eq_events; @@ -493,6 +502,7 @@ static void mana_gd_deregiser_irq(struct gdma_queue *queue) struct gdma_resource *r; unsigned int msix_index; unsigned long flags; + struct gdma_queue *eq; gc = gd->gdma_context; r = &gc->msix_resource; @@ -502,12 +512,19 @@ static void mana_gd_deregiser_irq(struct gdma_queue *queue) if (WARN_ON(msix_index >= gc->num_msix_usable)) return; - gic = &gc->irq_contexts[msix_index]; - gic->handler = NULL; - gic->arg = NULL; - spin_lock_irqsave(&r->lock, flags); - bitmap_clear(r->map, msix_index, 1); + gic = &gc->irq_contexts[msix_index]; + list_for_each_entry_rcu(eq, &gic->eq_list, entry) { + if (queue == eq) { + list_del_rcu(&eq->entry); + synchronize_rcu(); + break; + } + } + if (list_empty(&gic->eq_list)) { + gic->handler = NULL; + bitmap_clear(r->map, msix_index, 1); + } spin_unlock_irqrestore(&r->lock, flags); queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; @@ -587,7 +604,8 @@ static int mana_gd_create_eq(struct gdma_dev *gd, u32 log2_num_entries; int err; - queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; + queue->eq.msix_index = spec->eq.msix_index; + queue->id = INVALID_QUEUE_ID; log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE); @@ -819,6 +837,7 @@ free_q: kfree(queue); return err; } +EXPORT_SYMBOL_NS(mana_gd_create_mana_eq, NET_MANA); int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, const struct gdma_queue_spec *spec, @@ -895,6 +914,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) mana_gd_free_memory(gmi); kfree(queue); } +EXPORT_SYMBOL_NS(mana_gd_destroy_queue, NET_MANA); int mana_gd_verify_vf_version(struct pci_dev *pdev) { @@ -1217,9 +1237,14 @@ int mana_gd_poll_cq(struct gdma_q
[PATCH for-next v2] net: mana: add msix index sharing between EQs
From: Konstantin Taranov This patch allows to assign and poll more than one EQ on the same msix index. It is achieved by introducing a list of attached EQs in each IRQ context. It also removes the existing msix_index map that tried to ensure that there is only one EQ at each msix_index. This patch exports symbols for creating EQs from other MANA kernel modules. Signed-off-by: Konstantin Taranov --- V1 -> V2: removed msix_index map and improved thread-safety of rcu lists --- .../net/ethernet/microsoft/mana/gdma_main.c | 76 +-- .../net/ethernet/microsoft/mana/hw_channel.c | 1 + drivers/net/ethernet/microsoft/mana/mana_en.c | 1 + include/net/mana/gdma.h | 7 +- 4 files changed, 43 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 6367de0..a686301 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -414,8 +414,12 @@ static void mana_gd_process_eq_events(void *arg) old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; /* No more entries */ - if (owner_bits == old_bits) + if (owner_bits == old_bits) { + /* return here without ringing the doorbell */ + if (i == 0) + return; break; + } new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; if (owner_bits != new_bits) { @@ -445,42 +449,29 @@ static int mana_gd_register_irq(struct gdma_queue *queue, struct gdma_dev *gd = queue->gdma_dev; struct gdma_irq_context *gic; struct gdma_context *gc; - struct gdma_resource *r; unsigned int msi_index; unsigned long flags; struct device *dev; int err = 0; gc = gd->gdma_context; - r = &gc->msix_resource; dev = gc->dev; + msi_index = spec->eq.msix_index; - spin_lock_irqsave(&r->lock, flags); - - msi_index = find_first_zero_bit(r->map, r->size); - if (msi_index >= r->size || msi_index >= gc->num_msix_usable) { + if (msi_index >= gc->num_msix_usable) { err = -ENOSPC; - } else { - bitmap_set(r->map, msi_index, 1); - queue->eq.msix_index = msi_index; - } - - spin_unlock_irqrestore(&r->lock, flags); - - if (err) { - dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u", - err, msi_index, r->size, gc->num_msix_usable); + dev_err(dev, "Register IRQ err:%d, msi:%u nMSI:%u", + err, msi_index, gc->num_msix_usable); return err; } + queue->eq.msix_index = msi_index; gic = &gc->irq_contexts[msi_index]; - WARN_ON(gic->handler || gic->arg); - - gic->arg = queue; - - gic->handler = mana_gd_process_eq_events; + spin_lock_irqsave(&gic->lock, flags); + list_add_rcu(&queue->entry, &gic->eq_list); + spin_unlock_irqrestore(&gic->lock, flags); return 0; } @@ -490,12 +481,11 @@ static void mana_gd_deregiser_irq(struct gdma_queue *queue) struct gdma_dev *gd = queue->gdma_dev; struct gdma_irq_context *gic; struct gdma_context *gc; - struct gdma_resource *r; unsigned int msix_index; unsigned long flags; + struct gdma_queue *eq; gc = gd->gdma_context; - r = &gc->msix_resource; /* At most num_online_cpus() + 1 interrupts are used. */ msix_index = queue->eq.msix_index; @@ -503,14 +493,17 @@ static void mana_gd_deregiser_irq(struct gdma_queue *queue) return; gic = &gc->irq_contexts[msix_index]; - gic->handler = NULL; - gic->arg = NULL; - - spin_lock_irqsave(&r->lock, flags); - bitmap_clear(r->map, msix_index, 1); - spin_unlock_irqrestore(&r->lock, flags); + spin_lock_irqsave(&gic->lock, flags); + list_for_each_entry_rcu(eq, &gic->eq_list, entry) { + if (queue == eq) { + list_del_rcu(&eq->entry); + break; + } + } + spin_unlock_irqrestore(&gic->lock, flags); queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; + synchronize_rcu(); } int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) @@ -588,6 +581,7 @@ static int mana_gd_create_eq(struct gdma_dev *gd, int err; queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; + queue->id = INVALID_QUEUE_ID;
RE: [PATCH] net: mana: Fix possible double free in error handling path
> > - kfree(madev); > I think you can just avoid using add_fail and keep/retain rest of init_fail, > idx_fail > conditions in old way right? I do agree with Sai. I think the patch can be just: @@ -2797,7 +2797,8 @@ static int add_adev(struct gdma_dev *gd) ret = auxiliary_device_init(adev); if (ret) goto init_fail; - + /* madev is owned by the auxiliary device */ + madev = NULL; ret = auxiliary_device_add(adev); if (ret) goto add_fail; - Konstantin
RE: [PATCH v2] net: mana: Fix possible double free in error handling path
> When auxiliary_device_add() returns error and then calls > auxiliary_device_uninit(), callback function adev_release calls kfree(madev). > We shouldn't call kfree(madev) again in the error handling path. Set 'madev' > to NULL. > > Signed-off-by: Ma Ke > --- > Changes in v2: > - streamlined the patch according suggestions; > - revised the description. > --- > drivers/net/ethernet/microsoft/mana/mana_en.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c > b/drivers/net/ethernet/microsoft/mana/mana_en.c > index d087cf954f75..608ad31a9702 100644 > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c > @@ -2798,6 +2798,8 @@ static int add_adev(struct gdma_dev *gd) > if (ret) > goto init_fail; > > + /* madev is owned by the auxiliary device */ > + madev = NULL; > ret = auxiliary_device_add(adev); > if (ret) > goto add_fail; > -- > 2.25.1 Reviewed-by: Konstantin Taranov
RE: [PATCH v2] net: mana: Fix possible double free in error handling path
> When auxiliary_device_add() returns error and then calls > auxiliary_device_uninit(), callback function adev_release calls kfree(madev). > We shouldn't call kfree(madev) again in the error handling path. Set 'madev' > to NULL. > > Signed-off-by: Ma Ke > --- > Changes in v2: > - streamlined the patch according suggestions; > - revised the description. The change is ok, but the commit message is missing a "Fixes" tag/line. - Konstantin
[PATCH rdma-next 02/13] RDMA/mana_ib: implement get_dma_mr
From: Konstantin Taranov Implement allocation of DMA-mapped memory regions. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/device.c | 1 + drivers/infiniband/hw/mana/mr.c | 36 + include/net/mana/gdma.h | 5 3 files changed, 42 insertions(+) diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 7ac0191..215dbce 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -32,6 +32,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, .destroy_wq = mana_ib_destroy_wq, .disassociate_ucontext = mana_ib_disassociate_ucontext, + .get_dma_mr = mana_ib_get_dma_mr, .get_link_layer = mana_ib_get_link_layer, .get_port_immutable = mana_ib_get_port_immutable, .mmap = mana_ib_mmap, diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 887b09d..3a047f8 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -8,6 +8,8 @@ #define VALID_MR_FLAGS \ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ) +#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE) + static enum gdma_mr_access_flags mana_ib_verbs_to_gdma_access_flags(int access_flags) { @@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, req.mr_type = mr_params->mr_type; switch (mr_params->mr_type) { + case GDMA_MR_TYPE_GPA: + break; case GDMA_MR_TYPE_GVA: req.gva.dma_region_handle = mr_params->gva.dma_region_handle; req.gva.virtual_address = mr_params->gva.virtual_address; @@ -169,6 +173,38 @@ err_free: return ERR_PTR(err); } +struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + if (access_flags & ~VALID_DMA_MR_FLAGS) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GPA; + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_free; + + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 03e1b25..a94b04e 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -801,6 +801,11 @@ struct gdma_destory_pd_resp { };/* HW DATA */ enum gdma_mr_type { + /* +* Guest Physical Address - MRs of this type allow access +* to any DMA-mapped memory using bus-logical address +*/ + GDMA_MR_TYPE_GPA = 1, /* Guest Virtual Address - MRs of this type allow access * to memory mapped by PTEs associated with this MR using a virtual * address that is set up in the MST -- 2.43.0
[PATCH rdma-next 00/13] RDMA/mana_ib: Enable CM for mana_ib
From: Konstantin Taranov This patch series enables GSI QPs and CM on mana_ib. Konstantin Taranov (13): RDMA/mana_ib: Allow registration of DMA-mapped memory in PDs RDMA/mana_ib: implement get_dma_mr RDMA/mana_ib: helpers to allocate kernel queues RDMA/mana_ib: create kernel-level CQs RDMA/mana_ib: Create and destroy UD/GSI QP RDMA/mana_ib: UD/GSI QP creation for kernel RDMA/mana_ib: create/destroy AH net/mana: fix warning in the writer of client oob RDMA/mana_ib: UD/GSI work requests RDMA/mana_ib: implement req_notify_cq RDMA/mana_ib: extend mana QP table RDMA/mana_ib: polling of CQs for GSI/UD RDMA/mana_ib: indicate CM support drivers/infiniband/hw/mana/Makefile | 2 +- drivers/infiniband/hw/mana/ah.c | 58 + drivers/infiniband/hw/mana/cq.c | 227 ++-- drivers/infiniband/hw/mana/device.c | 18 +- drivers/infiniband/hw/mana/main.c | 95 ++- drivers/infiniband/hw/mana/mana_ib.h | 157 ++- drivers/infiniband/hw/mana/mr.c | 36 +++ drivers/infiniband/hw/mana/qp.c | 245 +- drivers/infiniband/hw/mana/shadow_queue.h | 115 drivers/infiniband/hw/mana/wr.c | 168 .../net/ethernet/microsoft/mana/gdma_main.c | 7 +- include/net/mana/gdma.h | 6 + 12 files changed, 1096 insertions(+), 38 deletions(-) create mode 100644 drivers/infiniband/hw/mana/ah.c create mode 100644 drivers/infiniband/hw/mana/shadow_queue.h create mode 100644 drivers/infiniband/hw/mana/wr.c -- 2.43.0
[PATCH rdma-next 07/13] RDMA/mana_ib: create/destroy AH
From: Konstantin Taranov Implement create and destroy AH for kernel. In mana_ib, AV is passed as an sge in WQE. Allocate DMA memory and write an AV there. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/Makefile | 2 +- drivers/infiniband/hw/mana/ah.c | 58 drivers/infiniband/hw/mana/device.c | 13 ++- drivers/infiniband/hw/mana/mana_ib.h | 30 ++ 4 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/hw/mana/ah.c diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile index 88655fe..6e56f77 100644 --- a/drivers/infiniband/hw/mana/Makefile +++ b/drivers/infiniband/hw/mana/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o -mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c new file mode 100644 index 000..f56952e --- /dev/null +++ b/drivers/infiniband/hw/mana/ah.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah); + struct rdma_ah_attr *ah_attr = attr->ah_attr; + const struct ib_global_route *grh; + enum rdma_network_type ntype; + + if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE || + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + + if (udata) + return -EINVAL; + + ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle); + if (!ah->av) + return -ENOMEM; + + grh = rdma_ah_read_grh(ah_attr); + ntype = rdma_gid_attr_network_type(grh->sgid_attr); + + copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN); + ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label); + ah->av->hop_limit = grh->hop_limit; + ah->av->dscp = (grh->traffic_class >> 2) & 0x3f; + ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6); + + if (ah->av->is_ipv6) { + copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16); + copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16); + } else { + ah->av->dest_ip[10] = 0xFF; + ah->av->dest_ip[11] = 0xFF; + copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4); + copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4); + } + + return 0; +} + +int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah); + + dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 215dbce..d534ef1 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .add_gid = mana_ib_gd_add_gid, .alloc_pd = mana_ib_alloc_pd, .alloc_ucontext = mana_ib_alloc_ucontext, + .create_ah = mana_ib_create_ah, .create_cq = mana_ib_create_cq, .create_qp = mana_ib_create_qp, .create_rwq_ind_table = mana_ib_create_rwq_ind_table, @@ -27,6 +28,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .dealloc_ucontext = mana_ib_dealloc_ucontext, .del_gid = mana_ib_gd_del_gid, .dereg_mr = mana_ib_dereg_mr, + .destroy_ah = mana_ib_destroy_ah, .destroy_cq = mana_ib_destroy_cq, .destroy_qp = mana_ib_destroy_qp, .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, @@ -44,6 +46,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .query_port = mana_ib_query_port, .reg_user_mr = mana_ib_reg_user_mr, + INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp), @@ -135,15 +138,22 @@ static int mana_ib_probe(struct auxiliary_device *adev, goto destroy_rnic; } + dev->av_pool = dma_pool_create("mana_ib_a
[PATCH rdma-next 03/13] RDMA/mana_ib: helpers to allocate kernel queues
From: Konstantin Taranov Introduce helpers to allocate queues for kernel-level use. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/main.c | 23 +++ drivers/infiniband/hw/mana/mana_ib.h | 3 +++ .../net/ethernet/microsoft/mana/gdma_main.c | 1 + 3 files changed, 27 insertions(+) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 45b251b..f2f6bb3 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -240,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret); } +int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type, + struct mana_ib_queue *queue) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + struct gdma_queue_spec spec = {}; + int err; + + queue->id = INVALID_QUEUE_ID; + queue->gdma_region = GDMA_INVALID_DMA_REGION; + spec.type = type; + spec.monitor_avl_buf = false; + spec.queue_size = size; + err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem); + if (err) + return err; + /* take ownership into mana_ib from mana */ + queue->gdma_region = queue->kmem->mem_info.dma_region_handle; + queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + return 0; +} + int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, struct mana_ib_queue *queue) { @@ -279,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue */ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region); ib_umem_release(queue->umem); + if (queue->kmem) + mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem); } static int diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index b53a5b4..79ebd95 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -52,6 +52,7 @@ struct mana_ib_adapter_caps { struct mana_ib_queue { struct ib_umem *umem; + struct gdma_queue *kmem; u64 gdma_region; u64 id; }; @@ -388,6 +389,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, mana_handle_t gdma_region); +int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type, + struct mana_ib_queue *queue); int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, struct mana_ib_queue *queue); void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue); diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index e97af7a..3cb0543 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -867,6 +867,7 @@ free_q: kfree(queue); return err; } +EXPORT_SYMBOL_NS(mana_gd_create_mana_wq_cq, NET_MANA); void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) { -- 2.43.0
[PATCH rdma-next 01/13] RDMA/mana_ib: Allow registration of DMA-mapped memory in PDs
From: Konstantin Taranov Allow the HW to register DMA-mapped memory for kernel-level PDs. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/main.c | 3 +++ include/net/mana/gdma.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 67c2d43..45b251b 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req), sizeof(resp)); + if (!udata) + flags |= GDMA_PD_FLAG_ALLOW_GPA_MR; + req.flags = flags; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 90f5665..03e1b25 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -775,6 +775,7 @@ struct gdma_destroy_dma_region_req { enum gdma_pd_flags { GDMA_PD_FLAG_INVALID = 0, + GDMA_PD_FLAG_ALLOW_GPA_MR = 1, }; struct gdma_create_pd_req { -- 2.43.0
[PATCH rdma-next 08/13] net/mana: fix warning in the writer of client oob
From: Konstantin Taranov Do not warn on missing pad_data when oob is in sgl. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 3cb0543..a8a9cd7 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1042,7 +1042,7 @@ static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, header->inline_oob_size_div4 = client_oob_size / sizeof(u32); if (oob_in_sgl) { - WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2); + WARN_ON_ONCE(wqe_req->num_sge < 2); header->client_oob_in_sgl = 1; -- 2.43.0
[PATCH rdma-next 05/13] RDMA/mana_ib: Create and destroy UD/GSI QP
From: Konstantin Taranov Implement HW requests to create and destroy UD/GSI QPs. An UD/GSI QP has send and receive queues. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/main.c| 58 drivers/infiniband/hw/mana/mana_ib.h | 49 +++ 2 files changed, 107 insertions(+) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index f2f6bb3..b0c55cb 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -1013,3 +1013,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) } return 0; } + +int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, + struct ib_qp_init_attr *attr, u32 doorbell, u32 type) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd); + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_create_udqp_resp resp = {}; + struct mana_rnic_create_udqp_req req = {}; + int err, i; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.pd_handle = pd->pd_handle; + req.send_cq_handle = send_cq->cq_handle; + req.recv_cq_handle = recv_cq->cq_handle; + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) + req.dma_region[i] = qp->ud_qp.queues[i].gdma_region; + req.doorbell_page = doorbell; + req.max_send_wr = attr->cap.max_send_wr; + req.max_recv_wr = attr->cap.max_recv_wr; + req.max_send_sge = attr->cap.max_send_sge; + req.max_recv_sge = attr->cap.max_recv_sge; + req.qp_type = type; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err); + return err; + } + qp->qp_handle = resp.qp_handle; + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) { + qp->ud_qp.queues[i].id = resp.queue_ids[i]; + /* The GDMA regions are now owned by the RNIC QP handle */ + qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION; + } + return 0; +} + +int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + struct mana_rnic_destroy_udqp_resp resp = {0}; + struct mana_rnic_destroy_udqp_req req = {0}; + struct gdma_context *gc = mdev_to_gc(mdev); + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.qp_handle = qp->qp_handle; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err); + return err; + } + return 0; +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 79ebd95..5e470f1 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -115,6 +115,17 @@ struct mana_ib_rc_qp { struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX]; }; +enum mana_ud_queue_type { + MANA_UD_SEND_QUEUE = 0, + MANA_UD_RECV_QUEUE, + MANA_UD_QUEUE_TYPE_MAX, +}; + +struct mana_ib_ud_qp { + struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX]; + u32 sq_psn; +}; + struct mana_ib_qp { struct ib_qp ibqp; @@ -122,6 +133,7 @@ struct mana_ib_qp { union { struct mana_ib_queue raw_sq; struct mana_ib_rc_qp rc_qp; + struct mana_ib_ud_qp ud_qp; }; /* The port on the IB device, starting with 1 */ @@ -146,6 +158,8 @@ enum mana_ib_command_code { MANA_IB_DESTROY_ADAPTER = 0x30003, MANA_IB_CONFIG_IP_ADDR = 0x30004, MANA_IB_CONFIG_MAC_ADDR = 0x30005, + MANA_IB_CREATE_UD_QP= 0x30006, + MANA_IB_DESTROY_UD_QP = 0x30007, MANA_IB_CREATE_CQ = 0x30008, MANA_IB_DESTROY_CQ = 0x30009, MANA_IB_CREATE_RC_QP= 0x3000a, @@ -297,6 +311,37 @@ struct mana_rnic_destroy_rc_qp_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +struct mana_rnic_create_udqp_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + mana_handle_t pd_handle; + mana_handle_t send_cq_handle; + mana_handle_t recv_cq_handle
[PATCH rdma-next 11/13] RDMA/mana_ib: extend mana QP table
From: Konstantin Taranov Enable mana QP table to store UD/GSI QPs. For send queues, set the most significant bit to one, as send and receive WQs can have the same ID in mana. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/main.c| 2 +- drivers/infiniband/hw/mana/mana_ib.h | 8 ++- drivers/infiniband/hw/mana/qp.c | 78 ++-- 3 files changed, 83 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index b0c55cb..114e391 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -704,7 +704,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event) switch (event->type) { case GDMA_EQE_RNIC_QP_FATAL: qpn = event->details[0]; - qp = mana_get_qp_ref(mdev, qpn); + qp = mana_get_qp_ref(mdev, qpn, false); if (!qp) break; if (qp->ibqp.event_handler) { diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index bd34ad6..5e4ca55 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -23,6 +23,9 @@ /* MANA doesn't have any limit for MR size */ #define MANA_IB_MAX_MR_SIZEU64_MAX +/* Send queue ID mask */ +#define MANA_SENDQ_MASKBIT(31) + /* * The hardware limit of number of MRs is greater than maximum number of MRs * that can possibly represent in 24 bits @@ -438,11 +441,14 @@ static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) } static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev, -uint32_t qid) +u32 qid, bool is_sq) { struct mana_ib_qp *qp; unsigned long flag; + if (is_sq) + qid |= MANA_SENDQ_MASK; + xa_lock_irqsave(&mdev->qp_table_wq, flag); qp = xa_load(&mdev->qp_table_wq, qid); if (qp) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 051ea03..2528046 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -444,18 +444,82 @@ static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 return type; } +static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, +GFP_KERNEL); +} + +static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); +} + +static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK; + u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + int err; + + err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL); + if (err) + return err; + + err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL); + if (err) + goto remove_sq; + + return 0; + +remove_sq: + xa_erase_irq(&mdev->qp_table_wq, qids); + return err; +} + +static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK; + u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + + xa_erase_irq(&mdev->qp_table_wq, qids); + xa_erase_irq(&mdev->qp_table_wq, qidr); +} + static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { refcount_set(&qp->refcount, 1); init_completion(&qp->free); - return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, -GFP_KERNEL); + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + return mana_table_store_rc_qp(mdev, qp); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_table_store_ud_qp(mdev, qp); + default: + ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n", + qp->ibqp.qp_type); + } + + return -EINVAL; } static void mana_table_remove_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { - xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + mana_table_remove_rc_qp(mdev, qp); + break; + case IB_QPT_UD: + case IB_QPT_GSI: + mana_table_remove_ud_qp(mdev, qp); +
[PATCH rdma-next 10/13] RDMA/mana_ib: implement req_notify_cq
From: Konstantin Taranov Arm a CQ when req_notify_cq is called. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/cq.c | 12 drivers/infiniband/hw/mana/device.c | 1 + drivers/infiniband/hw/mana/mana_ib.h| 2 ++ drivers/net/ethernet/microsoft/mana/gdma_main.c | 1 + 4 files changed, 16 insertions(+) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index d26d82d..82f1462 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -168,3 +168,15 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) kfree(gc->cq_table[cq->queue.id]); gc->cq_table[cq->queue.id] = NULL; } + +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct gdma_queue *gdma_cq = cq->queue.kmem; + + if (!gdma_cq) + return -EINVAL; + + mana_gd_ring_cq(gdma_cq, SET_ARM_BIT); + return 0; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 1da86c3..63e12c3 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -47,6 +47,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .query_pkey = mana_ib_query_pkey, .query_port = mana_ib_query_port, .reg_user_mr = mana_ib_reg_user_mr, + .req_notify_cq = mana_ib_arm_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 6265c39..bd34ad6 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -595,4 +595,6 @@ int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); + +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); #endif diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 409e4e8..823f7e7 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -344,6 +344,7 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, head, arm_bit); } +EXPORT_SYMBOL_NS(mana_gd_ring_cq, NET_MANA); static void mana_gd_process_eqe(struct gdma_queue *eq) { -- 2.43.0
[PATCH rdma-next 13/13] RDMA/mana_ib: indicate CM support
From: Konstantin Taranov Set max_mad_size and IB_PORT_CM_SUP capability to enable connection manager. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/main.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 114e391..ae1fb69 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -561,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; - if (port_num == 1) + if (port_num == 1) { immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + } return 0; } @@ -621,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port, props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_EDR; props->pkey_tbl_len = 1; - if (port == 1) + if (port == 1) { props->gid_tbl_len = 16; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; + } return 0; } -- 2.43.0
[PATCH rdma-next 04/13] RDMA/mana_ib: create kernel-level CQs
From: Konstantin Taranov Implement creation of CQs for the kernel. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/cq.c | 80 + 1 file changed, 52 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f04a679..d26d82d 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -15,42 +15,57 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; + struct gdma_context *gc; bool is_rnic_cq; u32 doorbell; + u32 buf_size; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev_to_gc(mdev); cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors; cq->cq_handle = INVALID_MANA_HANDLE; - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) - return -EINVAL; + if (udata) { + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) + return -EINVAL; - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); - if (err) { - ibdev_dbg(ibdev, - "Failed to copy from udata for create cq, %d\n", err); - return err; - } + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err); + return err; + } - is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); + is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); - if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); - return -EINVAL; - } + if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); + return -EINVAL; + } - cq->cqe = attr->cqe; - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue); - if (err) { - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); - return err; - } + cq->cqe = attr->cqe; + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, + &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); + return err; + } - mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, - ibucontext); - doorbell = mana_ucontext->doorbell; + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + doorbell = mana_ucontext->doorbell; + } else { + is_rnic_cq = true; + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE)); + cq->cqe = buf_size / COMP_ENTRY_SIZE; + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err); + return err; + } + doorbell = gc->mana_ib.doorbell; + } if (is_rnic_cq) { err = mana_ib_gd_create_cq(mdev, cq, doorbell); @@ -66,11 +81,13 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } } - resp.cqid = cq->queue.id; - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); - if (err) { - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); - goto err_remove_cq_cb; + if (udata) { + resp.cqid = cq->queue.id; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); + goto err_remove_cq_cb; + } } return 0; @@ -122,7 +139
[PATCH rdma-next 06/13] RDMA/mana_ib: UD/GSI QP creation for kernel
From: Konstantin Taranov Implement UD/GSI QPs for the kernel. Allow create/modify/destroy for such QPs. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/qp.c | 115 1 file changed, 115 insertions(+) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c8..fea45be 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -398,6 +398,52 @@ err_free_vport: return err; } +static u32 mana_ib_wqe_size(u32 sge, u32 oob_size) +{ + u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size; + + return ALIGN(wqe_size, GDMA_WQE_BU_SIZE); +} + +static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type) +{ + u32 queue_size; + + switch (attr->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + if (queue_type == MANA_UD_SEND_QUEUE) + queue_size = attr->cap.max_send_wr * + mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE); + else + queue_size = attr->cap.max_recv_wr * + mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE); + break; + default: + return 0; + } + + return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size)); +} + +static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type) +{ + enum gdma_queue_type type; + + switch (attr->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + if (queue_type == MANA_UD_SEND_QUEUE) + type = GDMA_SQ; + else + type = GDMA_RQ; + break; + default: + type = GDMA_INVALID_QUEUE; + } + return type; +} + static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { refcount_set(&qp->refcount, 1); @@ -490,6 +536,51 @@ destroy_queues: return err; } +static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, + struct ib_qp_init_attr *attr, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct gdma_context *gc = mdev_to_gc(mdev); + u32 doorbell, queue_size; + int i, err; + + if (udata) { + ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported, %d\n", err); + return -EINVAL; + } + + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) { + queue_size = mana_ib_queue_size(attr, i); + err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i), + &qp->ud_qp.queues[i]); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", + i, err); + goto destroy_queues; + } + } + doorbell = gc->mana_ib.doorbell; + + err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err); + goto destroy_queues; + } + qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + qp->port = attr->port_num; + + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) + qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id; + + return 0; + +destroy_queues: + while (i-- > 0) + mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]); + return err; +} + int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -503,6 +594,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); case IB_QPT_RC: return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata); default: ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", attr->qp_type); @@ -579,6 +673,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { switch (ibqp->qp_type) { case IB_QPT_RC: + case IB_QPT_UD: + case IB_QPT_GSI: return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata); defa
[PATCH rdma-next 09/13] RDMA/mana_ib: UD/GSI work requests
From: Konstantin Taranov Implement post send and post recv for UD/GSI QPs. Add information about posted requests into shadow queues. Co-developed-by: Shiraz Saleem Signed-off-by: Shiraz Saleem Signed-off-by: Konstantin Taranov --- drivers/infiniband/hw/mana/Makefile | 2 +- drivers/infiniband/hw/mana/device.c | 2 + drivers/infiniband/hw/mana/mana_ib.h | 33 drivers/infiniband/hw/mana/qp.c | 21 ++- drivers/infiniband/hw/mana/shadow_queue.h | 115 drivers/infiniband/hw/mana/wr.c | 168 ++ .../net/ethernet/microsoft/mana/gdma_main.c | 2 + 7 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/hw/mana/shadow_queue.h create mode 100644 drivers/infiniband/hw/mana/wr.c diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile index 6e56f77..79426e7 100644 --- a/drivers/infiniband/hw/mana/Makefile +++ b/drivers/infiniband/hw/mana/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o -mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index d534ef1..1da86c3 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -40,6 +40,8 @@ static const struct ib_device_ops mana_ib_dev_ops = { .mmap = mana_ib_mmap, .modify_qp = mana_ib_modify_qp, .modify_wq = mana_ib_modify_wq, + .post_recv = mana_ib_post_recv, + .post_send = mana_ib_post_send, .query_device = mana_ib_query_device, .query_gid = mana_ib_query_gid, .query_pkey = mana_ib_query_pkey, diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 7b079d8..6265c39 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -14,6 +14,7 @@ #include #include +#include "shadow_queue.h" #define PAGE_SZ_BM \ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K |\ @@ -165,6 +166,9 @@ struct mana_ib_qp { /* The port on the IB device, starting with 1 */ u32 port; + struct shadow_queue shadow_rq; + struct shadow_queue shadow_sq; + refcount_t refcount; struct completion free; }; @@ -404,6 +408,30 @@ struct mana_rnic_set_qp_state_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +enum WQE_OPCODE_TYPES { + WQE_TYPE_UD_SEND = 0, + WQE_TYPE_UD_RECV = 8, +}; /* HW DATA */ + +struct rdma_send_oob { + u32 wqe_type: 5; + u32 fence : 1; + u32 signaled: 1; + u32 solicited : 1; + u32 psn : 24; + + u32 ssn_or_rqpn : 24; + u32 reserved1 : 8; + union { + struct { + u32 remote_qkey; + u32 immediate; + u32 reserved1; + u32 reserved2; + } ud_send; + }; +}; /* HW DATA */ + static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) { return mdev->gdma_dev->gdma_context; @@ -562,4 +590,9 @@ int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp); int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags); + +int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); #endif diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index fea45be..051ea03 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -562,10 +562,23 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, } doorbell = gc->mana_ib.doorbell; + err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr, + sizeof(struct ud_rq_shadow_wqe)); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err); + goto destroy_queues; + } + err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr, + sizeof(struct ud_sq_shadow_wqe)); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err); + goto destroy_shadow_queues; + } + err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doo
[PATCH rdma-next 12/13] RDMA/mana_ib: polling of CQs for GSI/UD
From: Konstantin Taranov Add polling for the kernel CQs. Process completion events for UD/GSI QPs. Signed-off-by: Konstantin Taranov Reviewed-by: Shiraz Saleem --- drivers/infiniband/hw/mana/cq.c | 135 ++ drivers/infiniband/hw/mana/device.c | 1 + drivers/infiniband/hw/mana/mana_ib.h | 32 + drivers/infiniband/hw/mana/qp.c | 33 + .../net/ethernet/microsoft/mana/gdma_main.c | 1 + 5 files changed, 202 insertions(+) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index 82f1462..5c325ef 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -90,6 +90,10 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } } + spin_lock_init(&cq->cq_lock); + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); + return 0; err_remove_cq_cb: @@ -180,3 +184,134 @@ int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) mana_gd_ring_cq(gdma_cq, SET_ARM_BIT); return 0; } + +static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem; + struct ud_sq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq); + if (!shadow_wqe) + return; + + shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_sq); +} + +static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem; + struct ud_rq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq); + if (!shadow_wqe) + return; + + shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len; + shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn; + shadow_wqe->header.error_code = IB_WC_SUCCESS; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_rq); +} + +static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe) +{ + struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq); + + if (!qp) + return; + + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) { + if (cqe->is_sq) + handle_ud_sq_cqe(qp, cqe); + else + handle_ud_rq_cqe(qp, cqe); + } + + mana_put_qp_ref(qp); +} + +static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc, + const struct shadow_wqe_header *shadow_wqe) +{ + const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe; + + wc->wr_id = shadow_wqe->wr_id; + wc->status = shadow_wqe->error_code; + wc->opcode = shadow_wqe->opcode; + wc->vendor_err = shadow_wqe->error_code; + wc->wc_flags = 0; + wc->qp = &qp->ibqp; + wc->pkey_index = 0; + + if (shadow_wqe->opcode == IB_WC_RECV) { + wc->byte_len = ud_wqe->byte_len; + wc->src_qp = ud_wqe->src_qpn; + wc->wc_flags |= IB_WC_GRH; + } +} + +static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc) +{ + struct shadow_wqe_header *shadow_wqe; + struct mana_ib_qp *qp; + int wc_index = 0; + + /* process send shadow queue completions */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_sq); + wc_index++; + } + } + + /* process recv shadow queue completions */ + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq)) + != NULL) { + if (wc_index >= nwc) + goto out; + +
RE: [PATCH rdma-next 04/13] RDMA/mana_ib: create kernel-level CQs
> Subject: RE: [PATCH rdma-next 04/13] RDMA/mana_ib: create kernel-level > CQs > > > Subject: [PATCH rdma-next 04/13] RDMA/mana_ib: create kernel-level CQs > > > > From: Konstantin Taranov > > > > Implement creation of CQs for the kernel. > > > > Signed-off-by: Konstantin Taranov > > Reviewed-by: Shiraz Saleem > > --- > > drivers/infiniband/hw/mana/cq.c | 80 > > + > > 1 file changed, 52 insertions(+), 28 deletions(-) > > > > diff --git a/drivers/infiniband/hw/mana/cq.c > > b/drivers/infiniband/hw/mana/cq.c index f04a679..d26d82d 100644 > > --- a/drivers/infiniband/hw/mana/cq.c > > +++ b/drivers/infiniband/hw/mana/cq.c > > @@ -15,42 +15,57 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const > > struct ib_cq_init_attr *attr, > > struct ib_device *ibdev = ibcq->device; > > struct mana_ib_create_cq ucmd = {}; > > struct mana_ib_dev *mdev; > > + struct gdma_context *gc; > > bool is_rnic_cq; > > u32 doorbell; > > + u32 buf_size; > > int err; > > > > mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); > > + gc = mdev_to_gc(mdev); > > > > cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors; > > cq->cq_handle = INVALID_MANA_HANDLE; > > > > - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) > > - return -EINVAL; > > + if (udata) { > > + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) > > + return -EINVAL; > > > > - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata- > > >inlen)); > > - if (err) { > > - ibdev_dbg(ibdev, > > - "Failed to copy from udata for create cq, %d\n", err); > > - return err; > > - } > > + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), > > udata->inlen)); > > + if (err) { > > + ibdev_dbg(ibdev, "Failed to copy from udata for create > > cq, %d\n", err); > > + return err; > > + } > > > > - is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); > > + is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); > > > > - if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { > > - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); > > - return -EINVAL; > > - } > > + if (!is_rnic_cq && attr->cqe > mdev- > >adapter_caps.max_qp_wr) > > { > > + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr- > >cqe); > > + return -EINVAL; > > + } > > > > - cq->cqe = attr->cqe; > > - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * > > COMP_ENTRY_SIZE, &cq->queue); > > - if (err) { > > - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", > > err); > > - return err; > > - } > > + cq->cqe = attr->cqe; > > + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * > > COMP_ENTRY_SIZE, > > + &cq->queue); > > + if (err) { > > + ibdev_dbg(ibdev, "Failed to create queue for create > > cq, %d\n", err); > > + return err; > > + } > > > > - mana_ucontext = rdma_udata_to_drv_context(udata, struct > > mana_ib_ucontext, > > - ibucontext); > > - doorbell = mana_ucontext->doorbell; > > + mana_ucontext = rdma_udata_to_drv_context(udata, struct > > mana_ib_ucontext, > > + ibucontext); > > + doorbell = mana_ucontext->doorbell; > > + } else { > > + is_rnic_cq = true; > > + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr- > >cqe > > * COMP_ENTRY_SIZE)); > > + cq->cqe = buf_size / COMP_ENTRY_SIZE; > > + err = mana_ib_create_kernel_queue(mdev, buf_size, > GDMA_CQ, > > &cq->queue); > > + if (err) { > > + ibdev_dbg(ibdev, "Failed to create kernel queue for > > create cq, %d\n", err); > > + return err; > > + } >