On 6/20/19 8:03 AM, Jack Wang wrote:
+module_param_named(max_chunk_size, max_chunk_size, int, 0444);
+MODULE_PARM_DESC(max_chunk_size,
+ "Max size for each IO request, when change the unit is in byte"
+ " (default: " __stringify(DEFAULT_MAX_CHUNK_SIZE_KB) "KB)");
Where can I find the definition of DEFAULT_MAX_CHUNK_SIZE_KB?
+static char cq_affinity_list[256] = "";
No empty initializers for file-scope variables please.
+ pr_info("cq_affinity_list changed to %*pbl\n",
+ cpumask_pr_args(&cq_affinity_mask));
Should this pr_info() call perhaps be changed into pr_debug()?
+static bool __ibtrs_srv_change_state(struct ibtrs_srv_sess *sess,
+ enum ibtrs_srv_state new_state)
+{
+ enum ibtrs_srv_state old_state;
+ bool changed = false;
+
+ old_state = sess->state;
+ switch (new_state) {
Please add a lockdep_assert_held() statement that checks whether calls
of this function are serialized properly.
+/**
+ * rdma_write_sg() - response on successful READ request
+ */
+static int rdma_write_sg(struct ibtrs_srv_op *id)
+{
+ struct ibtrs_srv_sess *sess = to_srv_sess(id->con->c.sess);
+ dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
+ struct ibtrs_srv *srv = sess->srv;
+ struct ib_send_wr inv_wr, imm_wr;
+ struct ib_rdma_wr *wr = NULL;
+ const struct ib_send_wr *bad_wr;
+ enum ib_send_flags flags;
+ size_t sg_cnt;
+ int err, i, offset;
+ bool need_inval;
+ u32 rkey = 0;
+
+ sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt);
+ need_inval = le16_to_cpu(id->rd_msg->flags) & IBTRS_MSG_NEED_INVAL_F;
+ if (unlikely(!sg_cnt))
+ return -EINVAL;
+
+ offset = 0;
+ for (i = 0; i < sg_cnt; i++) {
+ struct ib_sge *list;
+
+ wr = &id->tx_wr[i];
+ list = &id->tx_sg[i];
+ list->addr = dma_addr + offset;
+ list->length = le32_to_cpu(id->rd_msg->desc[i].len);
+
+ /* WR will fail with length error
+ * if this is 0
+ */
+ if (unlikely(list->length == 0)) {
+ ibtrs_err(sess, "Invalid RDMA-Write sg list length
0\n");
+ return -EINVAL;
+ }
+
+ list->lkey = sess->s.dev->ib_pd->local_dma_lkey;
+ offset += list->length;
+
+ wr->wr.wr_cqe = &io_comp_cqe;
+ wr->wr.sg_list = list;
+ wr->wr.num_sge = 1;
+ wr->remote_addr = le64_to_cpu(id->rd_msg->desc[i].addr);
+ wr->rkey = le32_to_cpu(id->rd_msg->desc[i].key);
+ if (rkey == 0)
+ rkey = wr->rkey;
+ else
+ /* Only one key is actually used */
+ WARN_ON_ONCE(rkey != wr->rkey);
+
+ if (i < (sg_cnt - 1))
+ wr->wr.next = &id->tx_wr[i + 1].wr;
+ else if (need_inval)
+ wr->wr.next = &inv_wr;
+ else
+ wr->wr.next = &imm_wr;
+
+ wr->wr.opcode = IB_WR_RDMA_WRITE;
+ wr->wr.ex.imm_data = 0;
+ wr->wr.send_flags = 0;
+ }
+ /*
+ * From time to time we have to post signalled sends,
+ * or send queue will fill up and only QP reset can help.
+ */
+ flags = atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth ?
+ 0 : IB_SEND_SIGNALED;
+
+ if (need_inval) {
+ inv_wr.next = &imm_wr;
+ inv_wr.wr_cqe = &io_comp_cqe;
+ inv_wr.sg_list = NULL;
+ inv_wr.num_sge = 0;
+ inv_wr.opcode = IB_WR_SEND_WITH_INV;
+ inv_wr.send_flags = 0;
+ inv_wr.ex.invalidate_rkey = rkey;
+ }
+ imm_wr.next = NULL;
+ imm_wr.wr_cqe = &io_comp_cqe;
+ imm_wr.sg_list = NULL;
+ imm_wr.num_sge = 0;
+ imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
+ imm_wr.send_flags = flags;
+ imm_wr.ex.imm_data = cpu_to_be32(ibtrs_to_io_rsp_imm(id->msg_id,
+ 0, need_inval));
+
+ ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr,
+ offset, DMA_BIDIRECTIONAL);
+
+ err = ib_post_send(id->con->c.qp, &id->tx_wr[0].wr, &bad_wr);
+ if (unlikely(err))
+ ibtrs_err(sess,
+ "Posting RDMA-Write-Request to QP failed, err: %d\n",
+ err);
+
+ return err;
+}
All other RDMA server implementations use rdma_rw_ctx_init() and
rdma_rw_ctx_wrs(). Please use these functions in IBTRS too.
+static void ibtrs_srv_hb_err_handler(struct ibtrs_con *c, int err)
+{
+ (void)err;
+ close_sess(to_srv_sess(c->sess));
+}
Is the (void)err statement really necessary?
+static int ibtrs_srv_rdma_init(struct ibtrs_srv_ctx *ctx, unsigned int port)
+{
+ struct sockaddr_in6 sin = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+ struct sockaddr_ib sib = {
+ .sib_family = AF_IB,
+ .sib_addr.sib_subnet_prefix = 0ULL,
+ .sib_addr.sib_interface_id = 0ULL,
+ .sib_sid = cpu_to_be64(RDMA_IB_IP_PS_IB | port),
+ .sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL),
+ .sib_pkey = cpu_to_be16(0xffff),
+ };
+ struct rdma_cm_id *cm_ip, *cm_ib;
+ int ret;
+
+ /*
+ * We accept both IPoIB and IB connections, so we need to keep
+ * two cm id's, one for each socket type and port space.
+ * If the cm initialization of one of the id's fails, we abort
+ * everything.
+ */
+ cm_ip = ibtrs_srv_cm_init(ctx, (struct sockaddr *)&sin, RDMA_PS_TCP);
+ if (unlikely(IS_ERR(cm_ip)))
+ return PTR_ERR(cm_ip);
+
+ cm_ib = ibtrs_srv_cm_init(ctx, (struct sockaddr *)&sib, RDMA_PS_IB);
+ if (unlikely(IS_ERR(cm_ib))) {
+ ret = PTR_ERR(cm_ib);
+ goto free_cm_ip;
+ }
+
+ ctx->cm_id_ip = cm_ip;
+ ctx->cm_id_ib = cm_ib;
+
+ return 0;
+
+free_cm_ip:
+ rdma_destroy_id(cm_ip);
+
+ return ret;
+}
Will the above work if CONFIG_IPV6=n?
+static int __init ibtrs_server_init(void)
+{
+ int err;
+
+ if (!strlen(cq_affinity_list))
+ init_cq_affinity();
Is the above if-test useful? Can that if-test be left out?
Thanks,
Bart.