On 6/20/19 8:03 AM, Jack Wang wrote:
+module_param_named(max_chunk_size, max_chunk_size, int, 0444);
+MODULE_PARM_DESC(max_chunk_size,
+                "Max size for each IO request, when change the unit is in byte"
+                " (default: " __stringify(DEFAULT_MAX_CHUNK_SIZE_KB) "KB)");

Where can I find the definition of DEFAULT_MAX_CHUNK_SIZE_KB?

+static char cq_affinity_list[256] = "";

No empty initializers for file-scope variables please.

+       pr_info("cq_affinity_list changed to %*pbl\n",
+               cpumask_pr_args(&cq_affinity_mask));

Should this pr_info() call perhaps be changed into pr_debug()?

+static bool __ibtrs_srv_change_state(struct ibtrs_srv_sess *sess,
+                                    enum ibtrs_srv_state new_state)
+{
+       enum ibtrs_srv_state old_state;
+       bool changed = false;
+
+       old_state = sess->state;
+       switch (new_state) {

Please add a lockdep_assert_held() statement that checks whether calls of this function are serialized properly.

+/**
+ * rdma_write_sg() - response on successful READ request
+ */
+static int rdma_write_sg(struct ibtrs_srv_op *id)
+{
+       struct ibtrs_srv_sess *sess = to_srv_sess(id->con->c.sess);
+       dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
+       struct ibtrs_srv *srv = sess->srv;
+       struct ib_send_wr inv_wr, imm_wr;
+       struct ib_rdma_wr *wr = NULL;
+       const struct ib_send_wr *bad_wr;
+       enum ib_send_flags flags;
+       size_t sg_cnt;
+       int err, i, offset;
+       bool need_inval;
+       u32 rkey = 0;
+
+       sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt);
+       need_inval = le16_to_cpu(id->rd_msg->flags) & IBTRS_MSG_NEED_INVAL_F;
+       if (unlikely(!sg_cnt))
+               return -EINVAL;
+
+       offset = 0;
+       for (i = 0; i < sg_cnt; i++) {
+               struct ib_sge *list;
+
+               wr              = &id->tx_wr[i];
+               list            = &id->tx_sg[i];
+               list->addr   = dma_addr + offset;
+               list->length = le32_to_cpu(id->rd_msg->desc[i].len);
+
+               /* WR will fail with length error
+                * if this is 0
+                */
+               if (unlikely(list->length == 0)) {
+                       ibtrs_err(sess, "Invalid RDMA-Write sg list length 
0\n");
+                       return -EINVAL;
+               }
+
+               list->lkey = sess->s.dev->ib_pd->local_dma_lkey;
+               offset += list->length;
+
+               wr->wr.wr_cqe        = &io_comp_cqe;
+               wr->wr.sg_list       = list;
+               wr->wr.num_sge       = 1;
+               wr->remote_addr      = le64_to_cpu(id->rd_msg->desc[i].addr);
+               wr->rkey     = le32_to_cpu(id->rd_msg->desc[i].key);
+               if (rkey == 0)
+                       rkey = wr->rkey;
+               else
+                       /* Only one key is actually used */
+                       WARN_ON_ONCE(rkey != wr->rkey);
+
+               if (i < (sg_cnt - 1))
+                       wr->wr.next = &id->tx_wr[i + 1].wr;
+               else if (need_inval)
+                       wr->wr.next = &inv_wr;
+               else
+                       wr->wr.next = &imm_wr;
+
+               wr->wr.opcode = IB_WR_RDMA_WRITE;
+               wr->wr.ex.imm_data = 0;
+               wr->wr.send_flags  = 0;
+       }
+       /*
+        * From time to time we have to post signalled sends,
+        * or send queue will fill up and only QP reset can help.
+        */
+       flags = atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth ?
+                       0 : IB_SEND_SIGNALED;
+
+       if (need_inval) {
+               inv_wr.next = &imm_wr;
+               inv_wr.wr_cqe = &io_comp_cqe;
+               inv_wr.sg_list = NULL;
+               inv_wr.num_sge = 0;
+               inv_wr.opcode = IB_WR_SEND_WITH_INV;
+               inv_wr.send_flags = 0;
+               inv_wr.ex.invalidate_rkey = rkey;
+       }
+       imm_wr.next = NULL;
+       imm_wr.wr_cqe = &io_comp_cqe;
+       imm_wr.sg_list = NULL;
+       imm_wr.num_sge = 0;
+       imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM;
+       imm_wr.send_flags = flags;
+       imm_wr.ex.imm_data = cpu_to_be32(ibtrs_to_io_rsp_imm(id->msg_id,
+                                                            0, need_inval));
+
+       ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr,
+                                     offset, DMA_BIDIRECTIONAL);
+
+       err = ib_post_send(id->con->c.qp, &id->tx_wr[0].wr, &bad_wr);
+       if (unlikely(err))
+               ibtrs_err(sess,
+                         "Posting RDMA-Write-Request to QP failed, err: %d\n",
+                         err);
+
+       return err;
+}

All other RDMA server implementations use rdma_rw_ctx_init() and rdma_rw_ctx_wrs(). Please use these functions in IBTRS too.

+static void ibtrs_srv_hb_err_handler(struct ibtrs_con *c, int err)
+{
+       (void)err;
+       close_sess(to_srv_sess(c->sess));
+}

Is the (void)err statement really necessary?

+static int ibtrs_srv_rdma_init(struct ibtrs_srv_ctx *ctx, unsigned int port)
+{
+       struct sockaddr_in6 sin = {
+               .sin6_family    = AF_INET6,
+               .sin6_addr      = IN6ADDR_ANY_INIT,
+               .sin6_port      = htons(port),
+       };
+       struct sockaddr_ib sib = {
+               .sib_family                     = AF_IB,
+               .sib_addr.sib_subnet_prefix     = 0ULL,
+               .sib_addr.sib_interface_id      = 0ULL,
+               .sib_sid        = cpu_to_be64(RDMA_IB_IP_PS_IB | port),
+               .sib_sid_mask   = cpu_to_be64(0xffffffffffffffffULL),
+               .sib_pkey       = cpu_to_be16(0xffff),
+       };
+       struct rdma_cm_id *cm_ip, *cm_ib;
+       int ret;
+
+       /*
+        * We accept both IPoIB and IB connections, so we need to keep
+        * two cm id's, one for each socket type and port space.
+        * If the cm initialization of one of the id's fails, we abort
+        * everything.
+        */
+       cm_ip = ibtrs_srv_cm_init(ctx, (struct sockaddr *)&sin, RDMA_PS_TCP);
+       if (unlikely(IS_ERR(cm_ip)))
+               return PTR_ERR(cm_ip);
+
+       cm_ib = ibtrs_srv_cm_init(ctx, (struct sockaddr *)&sib, RDMA_PS_IB);
+       if (unlikely(IS_ERR(cm_ib))) {
+               ret = PTR_ERR(cm_ib);
+               goto free_cm_ip;
+       }
+
+       ctx->cm_id_ip = cm_ip;
+       ctx->cm_id_ib = cm_ib;
+
+       return 0;
+
+free_cm_ip:
+       rdma_destroy_id(cm_ip);
+
+       return ret;
+}

Will the above work if CONFIG_IPV6=n?

+static int __init ibtrs_server_init(void)
+{
+       int err;
+
+       if (!strlen(cq_affinity_list))
+               init_cq_affinity();

Is the above if-test useful? Can that if-test be left out?

Thanks,

Bart.

Reply via email to