> -----Original Message-----
> From: Pei, Andy <andy....@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo....@intel.com>; maxime.coque...@redhat.com; Cao,
> Gang <gang....@intel.com>; Liu, Changpeng <changpeng....@intel.com>
> Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> Add dirty page logging to block device.

Add SW live-migration support including dirty page logging for block device.

> 
> Signed-off-by: Andy Pei <andy....@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>  drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++--
> -----
>  3 files changed, 115 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index d10c1fd..e417c50 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -191,7 +191,7 @@
>       IFCVF_WRITE_REG32(val >> 32, hi);
>  }
> 
> -STATIC int
> +int
>  ifcvf_hw_enable(struct ifcvf_hw *hw)
>  {
>       struct ifcvf_pci_common_cfg *cfg;
> @@ -240,7 +240,7 @@
>       return 0;
>  }
> 
> -STATIC void
> +void
>  ifcvf_hw_disable(struct ifcvf_hw *hw)
>  {
>       u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>  u64
>  ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
> 
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>  #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8d104b7..a23dc2d 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
>       }
>  }
> 
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> +     struct ifcvf_hw *hw = &internal->hw;
> +     struct rte_vhost_vring vq;
> +     int i, vid;
> +     uint64_t features = 0;
> +     uint64_t log_base = 0, log_size = 0;
> +     uint64_t len;
> +
> +     vid = internal->vid;
> +
> +     if (internal->device_type == IFCVF_BLK) {
> +             for (i = 0; i < hw->nr_vring; i++) {
> +                     rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +                     while (vq.avail->idx != vq.used->idx) {
> +                             ifcvf_notify_queue(hw, i);
> +                             usleep(10);
> +                     }
> +                     hw->vring[i].last_avail_idx = vq.avail->idx;
> +                     hw->vring[i].last_used_idx = vq.used->idx;
> +             }
> +     }
> +
> +     ifcvf_hw_disable(hw);
> +
> +     for (i = 0; i < hw->nr_vring; i++)
> +             rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +                             hw->vring[i].last_used_idx);
> +
> +     if (internal->sw_lm)
> +             return;
> +
> +     rte_vhost_get_negotiated_features(vid, &features);
> +     if (RTE_VHOST_NEED_LOG(features)) {
> +             ifcvf_disable_logging(hw);
> +             rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> +             rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> +                             log_base, IFCVF_LOG_BASE, log_size);
> +             /*
> +              * IFCVF marks dirty memory pages for only packet buffer,
> +              * SW helps to mark the used ring as dirty after device stops.
> +              */
> +             for (i = 0; i < hw->nr_vring; i++) {
> +                     len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +                     rte_vhost_log_used_vring(vid, i, 0, len);
> +             }
> +     }
> +}

Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to one 
function and
check device type internally to do different things? Because as I see, most 
logic is 
the same.

> +
>  #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
>               sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
>  static int
> @@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
>               }
>               hw->vring[i].avail = gpa;
> 
> -             /* Direct I/O for Tx queue, relay for Rx queue */
> -             if (i & 1) {
> -                     gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> -                     if (gpa == 0) {
> -                             DRV_LOG(ERR, "Fail to get GPA for used ring.");
> -                             return -1;
> +             if (internal->device_type == IFCVF_NET) {
> +                     /* Direct I/O for Tx queue, relay for Rx queue */
> +                     if (i & 1) {
> +                             gpa = hva_to_gpa(vid,
> (uint64_t)(uintptr_t)vq.used);
> +                             if (gpa == 0) {
> +                                     DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> +                                     return -1;
> +                             }
> +                             hw->vring[i].used = gpa;
> +                     } else {
> +                             hw->vring[i].used = m_vring_iova +
> +                                     (char *)internal->m_vring[i].used -
> +                                     (char *)internal->m_vring[i].desc;
>                       }
> -                     hw->vring[i].used = gpa;
> -             } else {
> +             } else if (internal->device_type == IFCVF_BLK) {
> +                     /* BLK: relay every queue */
>                       hw->vring[i].used = m_vring_iova +
>                               (char *)internal->m_vring[i].used -
>                               (char *)internal->m_vring[i].desc;
> @@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
>       }
>       hw->nr_vring = nr_vring;
> 
> -     return ifcvf_start_hw(&internal->hw);
> +     if (internal->device_type == IFCVF_NET)
> +             return ifcvf_start_hw(&internal->hw);
> +     else if (internal->device_type == IFCVF_BLK)
> +             return ifcvf_hw_enable(&internal->hw);
> 
>  error:
>       for (i = 0; i < nr_vring; i++)
> @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
> 
>       for (i = 0; i < hw->nr_vring; i++) {
>               /* synchronize remaining new used entries if any */
> -             if ((i & 1) == 0)
> +             if (internal->device_type == IFCVF_NET) {
> +                     if ((i & 1) == 0)
> +                             update_used_ring(internal, i);
> +             } else if (internal->device_type == IFCVF_BLK) {
>                       update_used_ring(internal, i);
> +             }
> 
>               rte_vhost_get_vhost_vring(vid, i, &vq);
>               len = IFCVF_USED_RING_LEN(vq.size);
> @@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
>               }
>       }
> 
> -     for (qid = 0; qid < q_num; qid += 2) {
> -             ev.events = EPOLLIN | EPOLLPRI;
> -             /* leave a flag to mark it's for interrupt */
> -             ev.data.u64 = 1 | qid << 1 |
> -                     (uint64_t)internal->intr_fd[qid] << 32;
> -             if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -                             < 0) {
> -                     DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -                     return NULL;
> +     if (internal->device_type == IFCVF_NET) {
> +             for (qid = 0; qid < q_num; qid += 2) {
> +                     ev.events = EPOLLIN | EPOLLPRI;
> +                     /* leave a flag to mark it's for interrupt */
> +                     ev.data.u64 = 1 | qid << 1 |
> +                             (uint64_t)internal->intr_fd[qid] << 32;
> +                     if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +                                   internal->intr_fd[qid], &ev)
> +                                     < 0) {
> +                             DRV_LOG(ERR, "epoll add error: %s",
> +                                     strerror(errno));
> +                             return NULL;
> +                     }
> +                     update_used_ring(internal, qid);
> +             }
> +     } else if (internal->device_type == IFCVF_BLK) {
> +             for (qid = 0; qid < q_num; qid += 1) {
> +                     ev.events = EPOLLIN | EPOLLPRI;
> +                     /* leave a flag to mark it's for interrupt */
> +                     ev.data.u64 = 1 | qid << 1 |
> +                             (uint64_t)internal->intr_fd[qid] << 32;
> +                     if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +                                   internal->intr_fd[qid], &ev)
> +                                     < 0) {
> +                             DRV_LOG(ERR, "epoll add error: %s",
> +                                     strerror(errno));
> +                             return NULL;
> +                     }
> +                     update_used_ring(internal, qid);

It seems we can also reduce duplicate code for above case. And for other checks,
if we can use only one combined condition to check, I prefer to just use one.

Thanks,
Chenbo

>               }
> -             update_used_ring(internal, qid);
>       }
> 
>       /* start relay with a first kick */
> @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
> 
>       /* stop the direct IO data path */
>       unset_notify_relay(internal);
> -     vdpa_ifcvf_stop(internal);
> +     if (internal->device_type == IFCVF_NET)
> +             vdpa_ifcvf_stop(internal);
> +     else if (internal->device_type == IFCVF_BLK)
> +             vdpa_ifcvf_blk_pause(internal);
>       vdpa_disable_vfio_intr(internal);
> 
>       ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
> --
> 1.8.3.1

Reply via email to