Enable virtio blk sw live migration relay callfd and log the dirty page. In this version we ignore the write cmd and still mark it dirty.
Signed-off-by: Andy Pei <andy....@intel.com> --- drivers/vdpa/ifc/base/ifcvf.c | 4 +- drivers/vdpa/ifc/base/ifcvf.h | 6 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++------- 3 files changed, 116 insertions(+), 22 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -189,7 +189,7 @@ IFCVF_WRITE_REG32(val >> 32, hi); } -STATIC int +int ifcvf_hw_enable(struct ifcvf_hw *hw) { struct ifcvf_pci_common_cfg *cfg; @@ -238,7 +238,7 @@ return 0; } -STATIC void +void ifcvf_hw_disable(struct ifcvf_hw *hw) { u32 i; diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -179,4 +179,10 @@ struct ifcvf_hw { u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid); +int +ifcvf_hw_enable(struct ifcvf_hw *hw); + +void +ifcvf_hw_disable(struct ifcvf_hw *hw); + #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4f99bb3..a930825 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &features); if (RTE_VHOST_NEED_LOG(features)) { - ifcvf_disable_logging(hw); - rte_vhost_get_log_base(internal->vid, &log_base, &log_size); - rte_vfio_container_dma_unmap(internal->vfio_container_fd, - log_base, IFCVF_LOG_BASE, log_size); + if (internal->device_type == IFCVF_NET) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, + &log_size); + rte_vfio_container_dma_unmap( + internal->vfio_container_fd, log_base, + IFCVF_LOG_BASE, log_size); + } + /* IFCVF marks dirty memory pages for only packet buffer, + * SW helps to mark the used ring as dirty after device stops. + */ + for (i = 0; i < hw->nr_vring; i++) { + len = IFCVF_USED_RING_LEN(hw->vring[i].size); + rte_vhost_log_used_vring(vid, i, 0, len); + } + } +} + +static void +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) +{ + struct ifcvf_hw *hw = &internal->hw; + struct rte_vhost_vring vq; + int i, vid; + uint64_t features = 0; + uint64_t log_base = 0, log_size = 0; + uint64_t len; + + vid = internal->vid; + + if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vq); + while (vq.avail->idx != vq.used->idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + hw->vring[i].last_avail_idx = vq.avail->idx; + hw->vring[i].last_used_idx = vq.used->idx; + } + } + + ifcvf_hw_disable(hw); + + for (i = 0; i < hw->nr_vring; i++) + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, + hw->vring[i].last_used_idx); + + if (internal->sw_lm) + return; + + rte_vhost_get_negotiated_features(vid, &features); + if (RTE_VHOST_NEED_LOG(features)) { + if (internal->device_type == IFCVF_NET) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, + &log_size); + rte_vfio_container_dma_unmap( + internal->vfio_container_fd, log_base, + IFCVF_LOG_BASE, log_size); + } /* * IFCVF marks dirty memory pages for only packet buffer, * SW helps to mark the used ring as dirty after device stops. @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { + /* NETWORK: Direct I/O for Tx queue, relay for Rx queue + * BLK: relay every queue + */ + if ((i & 1) && (internal->device_type == IFCVF_NET)) { gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); if (gpa == 0) { DRV_LOG(ERR, "Fail to get GPA for used ring."); return -1; } hw->vring[i].used = gpa; - } else { + } else if (internal->device_type == IFCVF_BLK) { hw->vring[i].used = m_vring_iova + (char *)internal->m_vring[i].used - (char *)internal->m_vring[i].desc; @@ -688,7 +747,10 @@ struct rte_vdpa_dev_info { } hw->nr_vring = nr_vring; - return ifcvf_start_hw(&internal->hw); + if (internal->device_type == IFCVF_NET) + return ifcvf_start_hw(&internal->hw); + else if (internal->device_type == IFCVF_BLK) + return ifcvf_hw_enable(&internal->hw); error: for (i = 0; i < nr_vring; i++) @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info { for (i = 0; i < hw->nr_vring; i++) { /* synchronize remaining new used entries if any */ - if ((i & 1) == 0) + if (((i & 1) == 0 && internal->device_type == IFCVF_NET) || + internal->device_type == IFCVF_BLK) { update_used_ring(internal, i); + } rte_vhost_get_vhost_vring(vid, i, &vq); len = IFCVF_USED_RING_LEN(vq.size); @@ -726,6 +790,8 @@ struct rte_vdpa_dev_info { (uint64_t)(uintptr_t)internal->m_vring[i].desc, m_vring_iova, size); + hw->vring[i].last_avail_idx = vq.used->idx; + hw->vring[i].last_used_idx = vq.used->idx; rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, hw->vring[i].last_used_idx); rte_free(internal->m_vring[i].desc); @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info { } } - for (qid = 0; qid < q_num; qid += 2) { - ev.events = EPOLLIN | EPOLLPRI; - /* leave a flag to mark it's for interrupt */ - ev.data.u64 = 1 | qid << 1 | - (uint64_t)internal->intr_fd[qid] << 32; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev) - < 0) { - DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); - return NULL; + if (internal->device_type == IFCVF_NET) { + for (qid = 0; qid < q_num; qid += 2) { + ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ + ev.data.u64 = 1 | qid << 1 | + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, + internal->intr_fd[qid], &ev) + < 0) { + DRV_LOG(ERR, "epoll add error: %s", + strerror(errno)); + return NULL; + } + update_used_ring(internal, qid); + } + } else if (internal->device_type == IFCVF_BLK) { + for (qid = 0; qid < q_num; qid += 1) { + ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ + ev.data.u64 = 1 | qid << 1 | + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, + internal->intr_fd[qid], &ev) + < 0) { + DRV_LOG(ERR, "epoll add error: %s", + strerror(errno)); + return NULL; + } + update_used_ring(internal, qid); } - update_used_ring(internal, qid); } /* start relay with a first kick */ @@ -874,7 +959,10 @@ struct rte_vdpa_dev_info { /* stop the direct IO data path */ unset_notify_relay(internal); - vdpa_ifcvf_stop(internal); + if (internal->device_type == IFCVF_NET) + vdpa_ifcvf_stop(internal); + else if (internal->device_type == IFCVF_BLK) + vdpa_ifcvf_blk_pause(internal); vdpa_disable_vfio_intr(internal); ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false); -- 1.8.3.1