On Fri, Aug 03, 2018 at 03:04:51PM +0800, Jason Wang wrote:
> We use to have message like:
> 
> struct vhost_msg {
>       int type;
>       union {
>               struct vhost_iotlb_msg iotlb;
>               __u8 padding[64];
>       };
> };
> 
> Unfortunately, there will be a hole of 32bit in 64bit machine because
> of the alignment. This leads a different formats between 32bit API and
> 64bit API. What's more it will break 32bit program running on 64bit
> machine.
> 
> So fixing this by introducing a new message type with an explicit
> 32bit reserved field after type like:
> 
> struct vhost_msg_v2 {
>       int type;
>       __u32 reserved;
>       union {
>               struct vhost_iotlb_msg iotlb;
>               __u8 padding[64];
>       };
> };
> 
> We will have a consistent ABI after switching to use this. To enable
> this capability, introduce a new ioctl (VHOST_SET_BAKCEND_FEATURE) for
> userspace to enable this feature (VHOST_BACKEND_F_IOTLB_V2).
> 
> Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
> Signed-off-by: Jason Wang <jasow...@redhat.com>
> ---
>  drivers/vhost/net.c        | 30 ++++++++++++++++++++
>  drivers/vhost/vhost.c      | 71 
> ++++++++++++++++++++++++++++++++++------------
>  drivers/vhost/vhost.h      | 11 ++++++-
>  include/uapi/linux/vhost.h | 18 ++++++++++++
>  4 files changed, 111 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 367d802..4e656f8 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -78,6 +78,10 @@ enum {
>  };
>  
>  enum {
> +     VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
> +};
> +
> +enum {
>       VHOST_NET_VQ_RX = 0,
>       VHOST_NET_VQ_TX = 1,
>       VHOST_NET_VQ_MAX = 2,
> @@ -1399,6 +1403,21 @@ static long vhost_net_reset_owner(struct vhost_net *n)
>       return err;
>  }
>  
> +static int vhost_net_set_backend_features(struct vhost_net *n, u64 features)
> +{
> +     int i;
> +
> +     mutex_lock(&n->dev.mutex);
> +     for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
> +             mutex_lock(&n->vqs[i].vq.mutex);
> +             n->vqs[i].vq.acked_backend_features = features;
> +             mutex_unlock(&n->vqs[i].vq.mutex);
> +     }
> +     mutex_unlock(&n->dev.mutex);
> +
> +     return 0;
> +}
> +
>  static int vhost_net_set_features(struct vhost_net *n, u64 features)
>  {
>       size_t vhost_hlen, sock_hlen, hdr_len;
> @@ -1489,6 +1508,17 @@ static long vhost_net_ioctl(struct file *f, unsigned 
> int ioctl,
>               if (features & ~VHOST_NET_FEATURES)
>                       return -EOPNOTSUPP;
>               return vhost_net_set_features(n, features);
> +     case VHOST_GET_BACKEND_FEATURES:
> +             features = VHOST_NET_BACKEND_FEATURES;
> +             if (copy_to_user(featurep, &features, sizeof(features)))
> +                     return -EFAULT;
> +             return 0;
> +     case VHOST_SET_BACKEND_FEATURES:
> +             if (copy_from_user(&features, featurep, sizeof(features)))
> +                     return -EFAULT;
> +             if (features & ~VHOST_NET_BACKEND_FEATURES)
> +                     return -EOPNOTSUPP;
> +             return vhost_net_set_backend_features(n, features);
>       case VHOST_RESET_OWNER:
>               return vhost_net_reset_owner(n);
>       case VHOST_SET_OWNER:
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index a502f1a..6f6c42d 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -315,6 +315,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>       vq->log_addr = -1ull;
>       vq->private_data = NULL;
>       vq->acked_features = 0;
> +     vq->acked_backend_features = 0;
>       vq->log_base = NULL;
>       vq->error_ctx = NULL;
>       vq->kick = NULL;
> @@ -1027,28 +1028,40 @@ static int vhost_process_iotlb_msg(struct vhost_dev 
> *dev,
>  ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
>                            struct iov_iter *from)
>  {
> -     struct vhost_msg_node node;
> -     unsigned size = sizeof(struct vhost_msg);
> -     size_t ret;
> -     int err;
> +     struct vhost_iotlb_msg msg;
> +     size_t offset;
> +     int type, ret;
>  
> -     if (iov_iter_count(from) < size)
> -             return 0;
> -     ret = copy_from_iter(&node.msg, size, from);
> -     if (ret != size)
> +     ret = copy_from_iter(&type, sizeof(type), from);
> +     if (ret != sizeof(type))
>               goto done;
>  
> -     switch (node.msg.type) {
> +     switch (type) {
>       case VHOST_IOTLB_MSG:
> -             err = vhost_process_iotlb_msg(dev, &node.msg.iotlb);
> -             if (err)
> -                     ret = err;
> +             /* There maybe a hole after type for V1 message type,
> +              * so skip it here.
> +              */
> +             offset = offsetof(struct vhost_msg, iotlb) - sizeof(int);
> +             break;
> +     case VHOST_IOTLB_MSG_V2:
> +             offset = sizeof(__u32);
>               break;
>       default:
>               ret = -EINVAL;
> -             break;
> +             goto done;
> +     }
> +
> +     iov_iter_advance(from, offset);
> +     ret = copy_from_iter(&msg, sizeof(msg), from);
> +     if (ret != sizeof(msg))
> +             goto done;
> +     if (vhost_process_iotlb_msg(dev, &msg)) {
> +             ret = -EFAULT;
> +             goto done;
>       }
>  
> +     ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) :
> +           sizeof(struct vhost_msg_v2);
>  done:
>       return ret;
>  }

We can actually fix 32 bit apps too, checking the mode for v1.
But that can wait for another patch.

> @@ -1107,13 +1120,28 @@ ssize_t vhost_chr_read_iter(struct vhost_dev *dev, 
> struct iov_iter *to,
>               finish_wait(&dev->wait, &wait);
>  
>       if (node) {
> -             ret = copy_to_iter(&node->msg, size, to);
> +             struct vhost_iotlb_msg *msg;
> +             void *start = &node->msg;
> +
> +             switch (node->msg.type) {
> +             case VHOST_IOTLB_MSG:
> +                     size = sizeof(node->msg);
> +                     msg = &node->msg.iotlb;
> +                     break;
> +             case VHOST_IOTLB_MSG_V2:
> +                     size = sizeof(node->msg_v2);
> +                     msg = &node->msg_v2.iotlb;
> +                     break;
> +             default:
> +                     BUG();
> +                     break;
> +             }
>  
> -             if (ret != size || node->msg.type != VHOST_IOTLB_MISS) {
> +             ret = copy_to_iter(start, size, to);
> +             if (ret != size || msg->type != VHOST_IOTLB_MISS) {
>                       kfree(node);
>                       return ret;
>               }
> -
>               vhost_enqueue_msg(dev, &dev->pending_list, node);
>       }
>  
> @@ -1126,12 +1154,19 @@ static int vhost_iotlb_miss(struct vhost_virtqueue 
> *vq, u64 iova, int access)
>       struct vhost_dev *dev = vq->dev;
>       struct vhost_msg_node *node;
>       struct vhost_iotlb_msg *msg;
> +     bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2);
>  
> -     node = vhost_new_msg(vq, VHOST_IOTLB_MISS);
> +     node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG);
>       if (!node)
>               return -ENOMEM;
>  
> -     msg = &node->msg.iotlb;
> +     if (v2) {
> +             node->msg_v2.type = VHOST_IOTLB_MSG_V2;
> +             msg = &node->msg_v2.iotlb;
> +     } else {
> +             msg = &node->msg.iotlb;
> +     }
> +
>       msg->type = VHOST_IOTLB_MISS;
>       msg->iova = iova;
>       msg->perm = access;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 6c844b9..466ef75 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -132,6 +132,7 @@ struct vhost_virtqueue {
>       struct vhost_umem *iotlb;
>       void *private_data;
>       u64 acked_features;
> +     u64 acked_backend_features;
>       /* Log write descriptors */
>       void __user *log_base;
>       struct vhost_log *log;
> @@ -147,7 +148,10 @@ struct vhost_virtqueue {
>  };
>  
>  struct vhost_msg_node {
> -  struct vhost_msg msg;
> +  union {
> +       struct vhost_msg msg;
> +       struct vhost_msg_v2 msg_v2;
> +  };
>    struct vhost_virtqueue *vq;
>    struct list_head node;
>  };
> @@ -238,6 +242,11 @@ static inline bool vhost_has_feature(struct 
> vhost_virtqueue *vq, int bit)
>       return vq->acked_features & (1ULL << bit);
>  }
>  
> +static inline bool vhost_backend_has_feature(struct vhost_virtqueue *vq, int 
> bit)
> +{
> +     return vq->acked_backend_features & (1ULL << bit);
> +}
> +
>  #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
>  static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
>  {
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index c51f8e5..c176e9d 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -65,6 +65,7 @@ struct vhost_iotlb_msg {
>  };
>  
>  #define VHOST_IOTLB_MSG 0x1
> +#define VHOST_IOTLB_MSG_V2 0x2
>  
>  struct vhost_msg {
>       int type;
> @@ -74,6 +75,15 @@ struct vhost_msg {
>       };
>  };
>  
> +struct vhost_msg_v2 {
> +     int type;
> +     __u32 reserved;
> +     union {
> +             struct vhost_iotlb_msg iotlb;
> +             __u8 padding[64];
> +     };
> +};
> +
>  struct vhost_memory_region {
>       __u64 guest_phys_addr;
>       __u64 memory_size; /* bytes */
> @@ -160,6 +170,14 @@ struct vhost_memory {
>  #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24,    \
>                                        struct vhost_vring_state)
>  
> +/* Set or get vhost backend capability */
> +
> +/* Use message type V2 */
> +#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
> +
> +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
> +#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64)
> +
>  /* VHOST_NET specific defines */
>  
>  /* Attach virtio net ring to a raw socket, or tap device.

Acked-by: Michael S. Tsirkin <m...@redhat.com>

> -- 
> 2.7.4
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to