Hi. Can you elaborate why two CQs fix it? Does it depend on HCA implementation?
I'm not against two CQs for sending and receiving. In fact I'm for it because I use two CQs for postcopy RDMA support. thanks, On Fri, Aug 30, 2013 at 08:39:31PM +0800, Frank Yang wrote: > When several VMs migrate with RDMA at the same time, the increased pressure > cause packet loss probabilistically and make source and destination wait for > each other. There might be some of VMs blocked during the migration. > > Fix the bug by using two completion queues, for sending and receiving > respectively. > > From 0c4829495cdc89eea2e94b103ac42c3f6a4b32c2 Mon Sep 17 00:00:00 2001 > From: Frank Yang <frank.yang...@gmail.com> > Date: Fri, 30 Aug 2013 17:53:34 +0800 > Subject: [PATCH] rdma: fix multiple VMs parallel migration > > Signed-off-by: Frank Yang <frank.yang...@gmail.com> > --- > migration-rdma.c | 57 > ++++++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 37 insertions(+), 20 deletions(-) > > diff --git a/migration-rdma.c b/migration-rdma.c > index 3d1266f..d0eacbb 100644 > --- a/migration-rdma.c > +++ b/migration-rdma.c > @@ -362,7 +362,8 @@ typedef struct RDMAContext { > struct ibv_qp *qp; /* queue pair */ > struct ibv_comp_channel *comp_channel; /* completion channel */ > struct ibv_pd *pd; /* protection domain */ > - struct ibv_cq *cq; /* completion queue */ > + struct ibv_cq *send_cq; /* send completion queue */ > + struct ibv_cq *recv_cq; /* receive completion queue */ > > /* > * If a previous write failed (perhaps because of a failed > @@ -1006,9 +1007,12 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) > * Completion queue can be filled by both read and write work requests, > * so must reflect the sum of both possible queue sizes. > */ > - rdma->cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), > + rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 2), > NULL, rdma->comp_channel, 0); > - if (!rdma->cq) { > + rdma->recv_cq = ibv_create_cq(rdma->verbs, RDMA_SIGNALED_SEND_MAX, NULL, > + rdma->comp_channel, 0); > + > + if (!rdma->send_cq || !rdma->recv_cq) { > fprintf(stderr, "failed to allocate completion queue\n"); > goto err_alloc_pd_cq; > } > @@ -1040,8 +1044,8 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) > attr.cap.max_recv_wr = 3; > attr.cap.max_send_sge = 1; > attr.cap.max_recv_sge = 1; > - attr.send_cq = rdma->cq; > - attr.recv_cq = rdma->cq; > + attr.send_cq = rdma->send_cq; > + attr.recv_cq = rdma->recv_cq; > attr.qp_type = IBV_QPT_RC; > > ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr); > @@ -1361,13 +1365,18 @@ static void qemu_rdma_signal_unregister(RDMAContext > *rdma, uint64_t index, > * Return the work request ID that completed. > */ > static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, > - uint32_t *byte_len) > + uint32_t *byte_len, int wrid_requested) > { > int ret; > struct ibv_wc wc; > uint64_t wr_id; > > - ret = ibv_poll_cq(rdma->cq, 1, &wc); > + if (wrid_requested == RDMA_WRID_RDMA_WRITE || > + wrid_requested == RDMA_WRID_SEND_CONTROL) { > + ret = ibv_poll_cq(rdma->send_cq, 1, &wc); > + } else if (wrid_requested >= RDMA_WRID_RECV_CONTROL) { > + ret = ibv_poll_cq(rdma->recv_cq, 1, &wc); > + } > > if (!ret) { > *wr_id_out = RDMA_WRID_NONE; > @@ -1460,12 +1469,9 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, > int wrid_requested, > void *cq_ctx; > uint64_t wr_id = RDMA_WRID_NONE, wr_id_in; > > - if (ibv_req_notify_cq(rdma->cq, 0)) { > - return -1; > - } > /* poll cq first */ > while (wr_id != wrid_requested) { > - ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len); > + ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len, wrid_requested); > if (ret < 0) { > return ret; > } > @@ -1487,6 +1493,17 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, > int wrid_requested, > } > > while (1) { > + if (wrid_requested == RDMA_WRID_RDMA_WRITE || > + wrid_requested == RDMA_WRID_SEND_CONTROL) { > + if (ibv_req_notify_cq(rdma->send_cq, 0)) { > + return -1; > + } > + } else if (wrid_requested >= RDMA_WRID_RECV_CONTROL) { > + if (ibv_req_notify_cq(rdma->recv_cq, 0)) { > + return -1; > + } > + } > + > /* > * Coroutine doesn't start until process_incoming_migration() > * so don't yield unless we know we're running inside of a coroutine. > @@ -1502,12 +1519,8 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, > int wrid_requested, > > num_cq_events++; > > - if (ibv_req_notify_cq(cq, 0)) { > - goto err_block_for_wrid; > - } > - > while (wr_id != wrid_requested) { > - ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len); > + ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len, wrid_requested); > if (ret < 0) { > goto err_block_for_wrid; > } > @@ -2236,9 +2249,13 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) > ibv_destroy_qp(rdma->qp); > rdma->qp = NULL; > } > - if (rdma->cq) { > - ibv_destroy_cq(rdma->cq); > - rdma->cq = NULL; > + if (rdma->send_cq) { > + ibv_destroy_cq(rdma->send_cq); > + rdma->send_cq = NULL; > + } > + if (rdma->recv_cq) { > + ibv_destroy_cq(rdma->recv_cq); > + rdma->recv_cq = NULL; > } > if (rdma->comp_channel) { > ibv_destroy_comp_channel(rdma->comp_channel); > @@ -2770,7 +2787,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void > *opaque, > */ > while (1) { > uint64_t wr_id, wr_id_in; > - int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL); > + int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL, > RDMA_WRID_RDMA_WRITE); > if (ret < 0) { > fprintf(stderr, "rdma migration: polling error! %d\n", ret); > goto err; > -- > 1.8.3.msysgit.0 > > -- yamahata