Add SW live-migration support to block device. For block device, it is critical that no packet should be dropped. So when virtio blk device is pause, make sure hardware last_avail_idx and last_used_idx are the same. This indicates all requests have recieved acks, and no inflight IO.
Signed-off-by: Andy Pei <andy....@intel.com> --- drivers/vdpa/ifc/base/ifcvf.h | 1 + drivers/vdpa/ifc/ifcvf_vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 244de46..4fb1736 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -65,6 +65,7 @@ #define IFCVF_MEDIATED_VRING 0x200000000000 #define IFCVF_32_BIT_MASK 0xffffffff +#define IFCVF_16_BIT_MASK 0xffff #ifndef VHOST_USER_PROTOCOL_F_CONFIG diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 509a1ed..3e78c7d 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -316,8 +316,34 @@ struct rte_vdpa_dev_info { uint64_t features = 0; uint64_t log_base = 0, log_size = 0; uint64_t len; + u32 ring_state = 0; vid = internal->vid; + + /* to make sure no packet is lost for blk device + * do not stop until last_avail_idx == last_used_idx + */ + if (internal->hw.device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + do { + if (hw->lm_cfg != NULL) + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + hw->vring[i].last_used_idx = + (u16)(ring_state >> 16); + if (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + } while (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx); + } + } + ifcvf_stop_hw(hw); for (i = 0; i < hw->nr_vring; i++) @@ -641,8 +667,10 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { + /* NET: Direct I/O for Tx queue, relay for Rx queue + * BLK: relay every queue + */ + if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) { gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); if (gpa == 0) { DRV_LOG(ERR, "Fail to get GPA for used ring."); @@ -692,8 +720,12 @@ struct rte_vdpa_dev_info { for (i = 0; i < hw->nr_vring; i++) { /* synchronize remaining new used entries if any */ - if ((i & 1) == 0) + if (internal->hw.device_type == IFCVF_NET) { + if ((i & 1) == 0) + update_used_ring(internal, i); + } else if (internal->hw.device_type == IFCVF_BLK) { update_used_ring(internal, i); + } rte_vhost_get_vhost_vring(vid, i, &vq); len = IFCVF_USED_RING_LEN(vq.size); @@ -755,7 +787,9 @@ struct rte_vdpa_dev_info { } } - for (qid = 0; qid < q_num; qid += 2) { + for (qid = 0; qid < q_num; qid += 1) { + if ((internal->hw.device_type == IFCVF_NET) && (qid & 1)) + continue; ev.events = EPOLLIN | EPOLLPRI; /* leave a flag to mark it's for interrupt */ ev.data.u64 = 1 | qid << 1 | -- 1.8.3.1