On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
> > CC'ing virtio-dev@lists.oasis-open.org
> > 
> > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> >> This patch supports dgram in virtio and on the vhost side.
> Hello,
> 
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
> 
> Thanks

If I understand your question, the length is included in the header, so
receivers always know that header start + header length + payload length
marks the message boundary.

> >>
> >> Signed-off-by: Jiang Wang <jiang.w...@bytedance.com>
> >> Signed-off-by: Bobby Eshleman <bobby.eshle...@bytedance.com>
> >> ---
> >>  drivers/vhost/vsock.c                   |   2 +-
> >>  include/net/af_vsock.h                  |   2 +
> >>  include/uapi/linux/virtio_vsock.h       |   1 +
> >>  net/vmw_vsock/af_vsock.c                |  26 +++-
> >>  net/vmw_vsock/virtio_transport.c        |   2 +-
> >>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
> >>  6 files changed, 186 insertions(+), 20 deletions(-)
> >>
> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> index a5d1bdb786fe..3dc72a5647ca 100644
> >> --- a/drivers/vhost/vsock.c
> >> +++ b/drivers/vhost/vsock.c
> >> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> >>    int ret;
> >>  
> >>    ret = vsock_core_register(&vhost_transport.transport,
> >> -                            VSOCK_TRANSPORT_F_H2G);
> >> +                            VSOCK_TRANSPORT_F_H2G | 
> >> VSOCK_TRANSPORT_F_DGRAM);
> >>    if (ret < 0)
> >>            return ret;
> >>  
> >> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> >> index 1c53c4c4d88f..37e55c81e4df 100644
> >> --- a/include/net/af_vsock.h
> >> +++ b/include/net/af_vsock.h
> >> @@ -78,6 +78,8 @@ struct vsock_sock {
> >>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> >>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> >>  struct sock *vsock_create_connected(struct sock *parent);
> >> +int vsock_bind_stream(struct vsock_sock *vsk,
> >> +                struct sockaddr_vm *addr);
> >>  
> >>  /**** TRANSPORT ****/
> >>  
> >> diff --git a/include/uapi/linux/virtio_vsock.h 
> >> b/include/uapi/linux/virtio_vsock.h
> >> index 857df3a3a70d..0975b9c88292 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> >>  enum virtio_vsock_type {
> >>    VIRTIO_VSOCK_TYPE_STREAM = 1,
> >>    VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> >> +  VIRTIO_VSOCK_TYPE_DGRAM = 3,
> >>  };
> >>  
> >>  enum virtio_vsock_op {
> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >> index 1893f8aafa48..87e4ae1866d3 100644
> >> --- a/net/vmw_vsock/af_vsock.c
> >> +++ b/net/vmw_vsock/af_vsock.c
> >> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock 
> >> *vsk,
> >>    return 0;
> >>  }
> >>  
> >> +int vsock_bind_stream(struct vsock_sock *vsk,
> >> +                struct sockaddr_vm *addr)
> >> +{
> >> +  int retval;
> >> +
> >> +  spin_lock_bh(&vsock_table_lock);
> >> +  retval = __vsock_bind_connectible(vsk, addr);
> >> +  spin_unlock_bh(&vsock_table_lock);
> >> +
> >> +  return retval;
> >> +}
> >> +EXPORT_SYMBOL(vsock_bind_stream);
> >> +
> >>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> >>                          struct sockaddr_vm *addr)
> >>  {
> >> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct 
> >> vsock_transport *t, int features)
> >>    }
> >>  
> >>    if (features & VSOCK_TRANSPORT_F_DGRAM) {
> >> -          if (t_dgram) {
> >> -                  err = -EBUSY;
> >> -                  goto err_busy;
> >> +          /* TODO: always chose the G2H variant over others, support 
> >> nesting later */
> >> +          if (features & VSOCK_TRANSPORT_F_G2H) {
> >> +                  if (t_dgram)
> >> +                          pr_warn("virtio_vsock: t_dgram already set\n");
> >> +                  t_dgram = t;
> >> +          }
> >> +
> >> +          if (!t_dgram) {
> >> +                  t_dgram = t;
> >>            }
> >> -          t_dgram = t;
> >>    }
> >>  
> >>    if (features & VSOCK_TRANSPORT_F_LOCAL) {
> >> diff --git a/net/vmw_vsock/virtio_transport.c 
> >> b/net/vmw_vsock/virtio_transport.c
> >> index 073314312683..d4526ca462d2 100644
> >> --- a/net/vmw_vsock/virtio_transport.c
> >> +++ b/net/vmw_vsock/virtio_transport.c
> >> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> >>            return -ENOMEM;
> >>  
> >>    ret = vsock_core_register(&virtio_transport.transport,
> >> -                            VSOCK_TRANSPORT_F_G2H);
> >> +                            VSOCK_TRANSPORT_F_G2H | 
> >> VSOCK_TRANSPORT_F_DGRAM);
> >>    if (ret)
> >>            goto out_wq;
> >>  
> >> diff --git a/net/vmw_vsock/virtio_transport_common.c 
> >> b/net/vmw_vsock/virtio_transport_common.c
> >> index bdf16fff054f..aedb48728677 100644
> >> --- a/net/vmw_vsock/virtio_transport_common.c
> >> +++ b/net/vmw_vsock/virtio_transport_common.c
> >> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >>  
> >>  static u16 virtio_transport_get_type(struct sock *sk)
> >>  {
> >> -  if (sk->sk_type == SOCK_STREAM)
> >> +  if (sk->sk_type == SOCK_DGRAM)
> >> +          return VIRTIO_VSOCK_TYPE_DGRAM;
> >> +  else if (sk->sk_type == SOCK_STREAM)
> >>            return VIRTIO_VSOCK_TYPE_STREAM;
> >>    else
> >>            return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct 
> >> vsock_sock *vsk,
> >>    vvs = vsk->trans;
> >>  
> >>    /* we can send less than pkt_len bytes */
> >> -  if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> >> -          pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >> +  if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> >> +          if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >> +                  pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >> +          else
> >> +                  return 0;
> >> +  }
> >>  
> >> -  /* virtio_transport_get_credit might return less than pkt_len credit */
> >> -  pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >> +  if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> >> +          /* virtio_transport_get_credit might return less than pkt_len 
> >> credit */
> >> +          pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>  
> >> -  /* Do not send zero length OP_RW pkt */
> >> -  if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >> -          return pkt_len;
> >> +          /* Do not send zero length OP_RW pkt */
> >> +          if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >> +                  return pkt_len;
> >> +  }
> >>  
> >>    skb = virtio_transport_alloc_skb(info, pkt_len,
> >>                                     src_cid, src_port,
> >>                                     dst_cid, dst_port,
> >>                                     &err);
> >>    if (!skb) {
> >> -          virtio_transport_put_credit(vvs, pkt_len);
> >> +          if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >> +                  virtio_transport_put_credit(vvs, pkt_len);
> >>            return err;
> >>    }
> >>  
> >> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock 
> >> *vsk,
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> >>  
> >> +static ssize_t
> >> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> >> +                            struct msghdr *msg, size_t len)
> >> +{
> >> +  struct virtio_vsock_sock *vvs = vsk->trans;
> >> +  struct sk_buff *skb;
> >> +  size_t total = 0;
> >> +  u32 free_space;
> >> +  int err = -EFAULT;
> >> +
> >> +  spin_lock_bh(&vvs->rx_lock);
> >> +  if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> >> +          skb = __skb_dequeue(&vvs->rx_queue);
> >> +
> >> +          total = len;
> >> +          if (total > skb->len - vsock_metadata(skb)->off)
> >> +                  total = skb->len - vsock_metadata(skb)->off;
> >> +          else if (total < skb->len - vsock_metadata(skb)->off)
> >> +                  msg->msg_flags |= MSG_TRUNC;
> >> +
> >> +          /* sk_lock is held by caller so no one else can dequeue.
> >> +           * Unlock rx_lock since memcpy_to_msg() may sleep.
> >> +           */
> >> +          spin_unlock_bh(&vvs->rx_lock);
> >> +
> >> +          err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, 
> >> total);
> >> +          if (err)
> >> +                  return err;
> >> +
> >> +          spin_lock_bh(&vvs->rx_lock);
> >> +
> >> +          virtio_transport_dec_rx_pkt(vvs, skb);
> >> +          consume_skb(skb);
> >> +  }
> >> +
> >> +  free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> >> +
> >> +  spin_unlock_bh(&vvs->rx_lock);
> >> +
> >> +  if (total > 0 && msg->msg_name) {
> >> +          /* Provide the address of the sender. */
> >> +          DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> >> +
> >> +          vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> >> +                          le32_to_cpu(vsock_hdr(skb)->src_port));
> >> +          msg->msg_namelen = sizeof(*vm_addr);
> >> +  }
> >> +  return total;
> >> +}
> >> +
> >> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> >> +{
> >> +  return virtio_transport_stream_has_data(vsk);
> >> +}
> >> +
> >>  int
> >>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >>                               struct msghdr *msg,
> >> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> >>                           struct msghdr *msg,
> >>                           size_t len, int flags)
> >>  {
> >> -  return -EOPNOTSUPP;
> >> +  struct sock *sk;
> >> +  size_t err = 0;
> >> +  long timeout;
> >> +
> >> +  DEFINE_WAIT(wait);
> >> +
> >> +  sk = &vsk->sk;
> >> +  err = 0;
> >> +
> >> +  if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> >> +          return -EOPNOTSUPP;
> >> +
> >> +  lock_sock(sk);
> >> +
> >> +  if (!len)
> >> +          goto out;
> >> +
> >> +  timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> >> +
> >> +  while (1) {
> >> +          s64 ready;
> >> +
> >> +          prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> >> +          ready = virtio_transport_dgram_has_data(vsk);
> >> +
> >> +          if (ready == 0) {
> >> +                  if (timeout == 0) {
> >> +                          err = -EAGAIN;
> >> +                          finish_wait(sk_sleep(sk), &wait);
> >> +                          break;
> >> +                  }
> >> +
> >> +                  release_sock(sk);
> >> +                  timeout = schedule_timeout(timeout);
> >> +                  lock_sock(sk);
> >> +
> >> +                  if (signal_pending(current)) {
> >> +                          err = sock_intr_errno(timeout);
> >> +                          finish_wait(sk_sleep(sk), &wait);
> >> +                          break;
> >> +                  } else if (timeout == 0) {
> >> +                          err = -EAGAIN;
> >> +                          finish_wait(sk_sleep(sk), &wait);
> >> +                          break;
> >> +                  }
> >> +          } else {
> >> +                  finish_wait(sk_sleep(sk), &wait);
> >> +
> >> +                  if (ready < 0) {
> >> +                          err = -ENOMEM;
> >> +                          goto out;
> >> +                  }
> >> +
> >> +                  err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> >> +                  break;
> >> +          }
> >> +  }
> >> +out:
> >> +  release_sock(sk);
> >> +  return err;
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> >>  
> >> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> >>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> >>                            struct sockaddr_vm *addr)
> >>  {
> >> -  return -EOPNOTSUPP;
> >> +  return vsock_bind_stream(vsk, addr);
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> >>  
> >>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> >>  {
> >> -  return false;
> >> +  return true;
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> >>  
> >> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> >>                           struct msghdr *msg,
> >>                           size_t dgram_len)
> >>  {
> >> -  return -EOPNOTSUPP;
> >> +  struct virtio_vsock_pkt_info info = {
> >> +          .op = VIRTIO_VSOCK_OP_RW,
> >> +          .msg = msg,
> >> +          .pkt_len = dgram_len,
> >> +          .vsk = vsk,
> >> +          .remote_cid = remote_addr->svm_cid,
> >> +          .remote_port = remote_addr->svm_port,
> >> +  };
> >> +
> >> +  return virtio_transport_send_pkt_info(vsk, &info);
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> >>  
> >> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
> >>    struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> >>    int err = 0;
> >>  
> >> +  if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> >> +          virtio_transport_recv_enqueue(vsk, skb);
> >> +          sk->sk_data_ready(sk);
> >> +          return err;
> >> +  }
> >> +
> >>    switch (le16_to_cpu(hdr->op)) {
> >>    case VIRTIO_VSOCK_OP_RW:
> >>            virtio_transport_recv_enqueue(vsk, skb);
> >> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct 
> >> sk_buff *skb,
> >>  static bool virtio_transport_valid_type(u16 type)
> >>  {
> >>    return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >> -         (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >> +         (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >> +         (type == VIRTIO_VSOCK_TYPE_DGRAM);
> >>  }
> >>  
> >>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's 
> >> vq->mutex
> >> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct 
> >> virtio_transport *t,
> >>            goto free_pkt;
> >>    }
> >>  
> >> +  if (sk->sk_type == SOCK_DGRAM) {
> >> +          virtio_transport_recv_connected(sk, skb);
> >> +          goto out;
> >> +  }
> >> +
> >>    space_available = virtio_transport_space_update(sk, skb);
> >>  
> >>    /* Update CID in case it has changed after a transport reset event */
> >> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct 
> >> virtio_transport *t,
> >>            break;
> >>    }
> >>  
> >> +out:
> >>    release_sock(sk);
> >>  
> >>    /* Release refcnt obtained when we fetched this socket out of the
> >> -- 
> >> 2.35.1
> >>
> > 
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
> > 
> 

---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org

Reply via email to