From: xiongweimin <[email protected]> This commit adds essential RDMA verbs implementation for the virtio RDMA driver:
1. Port modification support (vrdma_modify_port): - Adds IB_PORT_SHUTDOWN flag handling for port deactivation - Maintains port capability mask state - Enforces strict attribute mask validation - Provides proper locking with port_mutex 2. Queue Pair modification support (vrdma_modify_qp): - Implements full QP attribute translation to virtio commands - Handles all standard IB_QP_* attribute masks (21 bits) - Uses efficient two-buffer scheme for device communication - Includes comprehensive error handling Key features: - Minimal port modification support focused on shutdown capability - Complete QP state transition handling - Attribute-by-attribute translation with 32+ fields covered - Safe memory management with guaranteed cleanup - Verbose error logging for debugging Signed-off-by: Xiong Weimin <[email protected]> --- .../infiniband/hw/virtio/vrdma_dev_api.h | 12 + .../drivers/infiniband/hw/virtio/vrdma_ib.c | 223 +++++++++++++++++- .../drivers/infiniband/hw/virtio/vrdma_ib.h | 54 +++++ 3 files changed, 288 insertions(+), 1 deletion(-) diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h index 84dc05a96..d0ce02601 100644 --- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h +++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_dev_api.h @@ -14,6 +14,8 @@ #include <rdma/vrdma_abi.h> +#include "vrdma_ib.h" + /** * struct vrdma_config - Virtio RDMA device configuration * @@ -213,6 +215,16 @@ struct vrdma_rsp_map_mr_sg { __u32 npages; }; +struct vrdma_cmd_modify_qp { + __u32 qpn; + __u32 attr_mask; + struct vrdma_qp_attr attrs; +}; + +struct vrdma_rsp_modify_qp { + __u32 qpn; +}; + #define VRDMA_CTRL_OK 0 #define VRDMA_CTRL_ERR 1 diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c index 738935e3d..2d9a612f3 100644 --- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c +++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.c @@ -55,6 +55,37 @@ static const char * const cmd_str[] = { [VIRTIO_RDMA_CMD_REQ_NOTIFY_CQ] = "REQ_NOTIFY_CQ", }; +static void ib_qp_cap_to_vrdma(struct vrdma_qp_cap *dst, const struct ib_qp_cap *src) +{ + //dst->max_send_wr = src->max_send_wr; + dst->max_send_wr = src->max_send_wr; + dst->max_recv_wr = src->max_recv_wr; + dst->max_send_sge = src->max_send_sge; + dst->max_recv_sge = src->max_recv_sge; + dst->max_inline_data = src->max_inline_data; +} + +static void ib_global_route_to_vrdma(struct vrdma_global_route *dst, + const struct ib_global_route *src) +{ + dst->dgid = src->dgid; + dst->flow_label = src->flow_label; + dst->sgid_index = src->sgid_index; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; +} + +static void rdma_ah_attr_to_vrdma(struct vrdma_ah_attr *dst, + const struct rdma_ah_attr *src) +{ + ib_global_route_to_vrdma(&dst->grh, rdma_ah_read_grh(src)); + dst->sl = rdma_ah_get_sl(src); + dst->static_rate = rdma_ah_get_static_rate(src); + dst->port_num = rdma_ah_get_port_num(src); + dst->ah_flags = rdma_ah_get_ah_flags(src); + memcpy(&dst->roce, &src->roce, sizeof(struct roce_ah_attr)); +} + /** * vrdma_exec_verbs_cmd - Execute a verbs command via control virtqueue * @vrdev: VRDMA device @@ -1922,6 +1953,194 @@ void vrdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(entry); } +/** + * vrdma_modify_port - Modify port attributes (limited support) + * @ibdev: Verbs device + * @port: Port number (1-indexed) + * @mask: Bitmask of attributes to modify + * @props: New port properties + * + * Currently only supports IB_PORT_SHUTDOWN flag. + * Other flags are rejected with -EOPNOTSUPP. + * + * Context: Can sleep (holds mutex). + * Return: + * * 0 on success + * * -EOPNOTSUPP if unsupported mask bits set + * * error code from ib_query_port() on failure + */ +static int vrdma_modify_port(struct ib_device *ibdev, u32 port, int mask, + struct ib_port_modify *props) +{ + struct vrdma_dev *vdev = to_vdev(ibdev); + struct ib_port_attr attr; + int ret; + + /* Only allow IB_PORT_SHUTDOWN; reject all others */ + if (mask & ~IB_PORT_SHUTDOWN) { + pr_warn("vRDMA: unsupported port modify mask %#x\n", mask); + return -EOPNOTSUPP; + } + + mutex_lock(&vdev->port_mutex); + + /* Query current port state (required by spec before modify in some cases) */ + ret = ib_query_port(ibdev, port, &attr); + if (ret) { + pr_err("vRDMA: failed to query port %u: %d\n", port, ret); + goto out_unlock; + } + + /* Apply capability mask changes */ + vdev->port_cap_mask |= props->set_port_cap_mask; + vdev->port_cap_mask &= ~props->clr_port_cap_mask; + + /* Handle shutdown request */ + if (mask & IB_PORT_SHUTDOWN) { + vdev->ib_active = false; + pr_info("vRDMA: port %u marked as inactive\n", port); + } + + ret = 0; /* Success */ + +out_unlock: + mutex_unlock(&vdev->port_mutex); + return ret; +} + +/** + * vrdma_modify_qp - Modify QP attributes via backend + * @ibqp: Queue pair to modify + * @attr: New QP attributes + * @attr_mask: Which fields in @attr are valid + * @udata: User data (unused here) + * + * Sends a VIRTIO_RDMA_CMD_MODIFY_QP command to the host backend + * to update the QP's state and parameters. + * + * Context: Process context (may sleep due to memory allocation). + * Return: + * * 0 on success + * * -ENOMEM if command buffer allocation fails + * * -EIO or other negative errno on communication failure + */ +static int vrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct vrdma_dev *vdev = to_vdev(ibqp->device); + struct vrdma_cmd_modify_qp *cmd; + struct vrdma_rsp_modify_qp *rsp; + struct scatterlist in, out; + int rc; + + /* Allocate command and response buffers */ + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + if (!rsp) { + kfree(cmd); + return -ENOMEM; + } + + /* Fill command header */ + cmd->qpn = to_vqp(ibqp)->qp_handle; + cmd->attr_mask = attr_mask & ((1U << 21) - 1); /* Limit to 21 bits */ + + /* Conditionally copy fields based on attr_mask */ + if (attr_mask & IB_QP_STATE) + cmd->attrs.qp_state = attr->qp_state; + + if (attr_mask & IB_QP_CUR_STATE) + cmd->attrs.cur_qp_state = attr->cur_qp_state; + + if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + cmd->attrs.qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_PKEY_INDEX) + cmd->attrs.pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + cmd->attrs.port_num = attr->port_num; + + if (attr_mask & IB_QP_QKEY) + cmd->attrs.qkey = attr->qkey; + + if (attr_mask & IB_QP_AV) + rdma_ah_attr_to_vrdma(&cmd->attrs.ah_attr, &attr->ah_attr); + + if (attr_mask & IB_QP_ALT_PATH) + rdma_ah_attr_to_vrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr); + + if (attr_mask & IB_QP_PATH_MTU) + cmd->attrs.path_mtu = attr->path_mtu; + + if (attr_mask & IB_QP_TIMEOUT) + cmd->attrs.timeout = attr->timeout; + + if (attr_mask & IB_QP_RETRY_CNT) + cmd->attrs.retry_cnt = attr->retry_cnt; + + if (attr_mask & IB_QP_RNR_RETRY) + cmd->attrs.rnr_retry = attr->rnr_retry; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + cmd->attrs.min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + cmd->attrs.max_rd_atomic = attr->max_rd_atomic; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_PATH_MIG_STATE) + cmd->attrs.path_mig_state = attr->path_mig_state; + + if (attr_mask & IB_QP_CAP) + ib_qp_cap_to_vrdma(&cmd->attrs.cap, &attr->cap); + + if (attr_mask & IB_QP_DEST_QPN) + cmd->attrs.dest_qp_num = attr->dest_qp_num; + + if (attr_mask & IB_QP_RQ_PSN) + cmd->attrs.rq_psn = attr->rq_psn; + + if (attr_mask & IB_QP_SQ_PSN) + cmd->attrs.sq_psn = attr->sq_psn; + + cmd->attrs.alt_pkey_index = attr->alt_pkey_index; + cmd->attrs.alt_port_num = attr->alt_port_num; + cmd->attrs.alt_timeout = attr->alt_timeout; + + if (attr_mask & IB_QP_RATE_LIMIT) + cmd->attrs.rate_limit = attr->rate_limit; + + /* Prepare scatterlists for virtqueue I/O */ + sg_init_one(&in, cmd, sizeof(*cmd)); + sg_init_one(&out, rsp, sizeof(*rsp)); + + /* Send command to backend */ + rc = vrdma_exec_verbs_cmd(vdev, VIRTIO_RDMA_CMD_MODIFY_QP, &in, &out); + if (rc) { + dev_err(&vdev->vdev->dev, + "VIRTIO_RDMA_CMD_MODIFY_QP failed: qpn=0x%x, err=%d\n", + cmd->qpn, rc); + goto out_free; + } + + /* Optional: Update local QP state based on response if needed */ + // e.g., to_vqp(ibqp)->state = rsp->new_state; + +out_free: + kfree(rsp); + kfree(cmd); + return rc; +} + static const struct ib_device_ops vrdma_dev_ops = { .owner = THIS_MODULE, .uverbs_abi_ver = VIRTIO_RDMA_ABI_VERSION, @@ -1950,7 +2169,9 @@ static const struct ib_device_ops vrdma_dev_ops = { .get_link_layer = vrdma_port_link_layer, .map_mr_sg = vrdma_map_mr_sg, .mmap = vrdma_mmap, - .mmap_free = vrdma_mmap_free, + .mmap_free = vrdma_mmap_free, + .modify_port = vrdma_modify_port, + .modify_qp = vrdma_modify_qp, }; /** diff --git a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h index 6759c4349..eaff37c3c 100644 --- a/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h +++ b/linux-6.16.8/drivers/infiniband/hw/virtio/vrdma_ib.h @@ -173,6 +173,60 @@ struct vrdma_qp { struct vrdma_user_mmap_entry *rq_entry; /* Mmap entry for RQ buffer */ }; +struct vrdma_global_route { + union ib_gid dgid; + uint32_t flow_label; + uint8_t sgid_index; + uint8_t hop_limit; + uint8_t traffic_class; +}; + +struct vrdma_ah_attr { + struct vrdma_global_route grh; + uint8_t sl; + uint8_t static_rate; + uint8_t port_num; + uint8_t ah_flags; + struct roce_ah_attr roce; +}; + +struct vrdma_qp_cap { + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct vrdma_qp_attr { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + enum ib_mig_state path_mig_state; + uint32_t qkey; + uint32_t rq_psn; + uint32_t sq_psn; + uint32_t dest_qp_num; + uint32_t qp_access_flags; + uint16_t pkey_index; + uint16_t alt_pkey_index; + uint8_t en_sqd_async_notify; + uint8_t sq_draining; + uint8_t max_rd_atomic; + uint8_t max_dest_rd_atomic; + uint8_t min_rnr_timer; + uint8_t port_num; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t rnr_retry; + uint8_t alt_port_num; + uint8_t alt_timeout; + uint32_t rate_limit; + struct vrdma_qp_cap cap; + struct vrdma_ah_attr ah_attr; + struct vrdma_ah_attr alt_ah_attr; +}; + /** * struct vrdma_mr - Software state of a Virtio-RDMA Memory Region (MR) * @ibmr: InfiniBand core MR object (contains rkey, lkey, etc.) -- 2.43.0
