On Fri, Dec 20, 2013 at 03:10:37PM +0100, Mian M. Hamayun wrote: > From: Antonios Motakis <a.mota...@virtualopensystems.com> > > Add structures for passing vhost-user messages over a unix domain socket. > This is the equivalent to the existing vhost-kernel ioctls. > > Connect to the named unix domain socket. The system call sendmsg > is used for communication. To be able to pass file descriptors > between processes - we use SCM_RIGHTS type in the message control header. > > Signed-off-by: Antonios Motakis <a.mota...@virtualopensystems.com> > Signed-off-by: Nikolay Nikolaev <n.nikol...@virtualopensystems.com> > --- > hw/virtio/vhost-backend.c | 167 > ++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 161 insertions(+), 6 deletions(-) > > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c > index 847809f..96d3bf0 100644 > --- a/hw/virtio/vhost-backend.c > +++ b/hw/virtio/vhost-backend.c > @@ -14,30 +14,185 @@ > #include <fcntl.h> > #include <unistd.h> > #include <sys/ioctl.h> > +#include <sys/socket.h> > +#include <sys/un.h> > +#include <linux/vhost.h> > + > +#define VHOST_MEMORY_MAX_NREGIONS 8 > + > +typedef enum VhostUserRequest { > + VHOST_USER_NONE = 0, > + VHOST_USER_GET_FEATURES = 1, > + VHOST_USER_SET_FEATURES = 2, > + VHOST_USER_SET_OWNER = 3, > + VHOST_USER_RESET_OWNER = 4, > + VHOST_USER_SET_MEM_TABLE = 5, > + VHOST_USER_SET_LOG_BASE = 6, > + VHOST_USER_SET_LOG_FD = 7, > + VHOST_USER_SET_VRING_NUM = 8, > + VHOST_USER_SET_VRING_ADDR = 9, > + VHOST_USER_SET_VRING_BASE = 10, > + VHOST_USER_GET_VRING_BASE = 11, > + VHOST_USER_SET_VRING_KICK = 12, > + VHOST_USER_SET_VRING_CALL = 13, > + VHOST_USER_SET_VRING_ERR = 14, > + VHOST_USER_NET_SET_BACKEND = 15, > + VHOST_USER_MAX > +} VhostUserRequest; > + > +typedef struct VhostUserMemoryRegion { > + __u64 guest_phys_addr; > + __u64 memory_size; > + __u64 userspace_addr; > +} VhostUserMemoryRegion; > + > +typedef struct VhostUserMemory { > + __u32 nregions;
There will be padding here: not a good idea as it will be different depending on the compiler. Better add an explicit dummy 32 field here. > + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > +} VhostUserMemory; > + > +typedef struct VhostUserMsg { > + VhostUserRequest request; > + > + int flags; Same thing here. > + union { > + uint64_t u64; > + int fd; > + struct vhost_vring_state state; > + struct vhost_vring_addr addr; > + struct vhost_vring_file file; > + > + VhostUserMemory memory; A union of fields of different sizes, this is likely to leak data from stack. > + }; > +} VhostUserMsg; > + > +static int vhost_user_recv(int fd, VhostUserMsg *msg) > +{ > + ssize_t r = read(fd, msg, sizeof(VhostUserMsg)); Can't this return EINTR? > + > + return (r == sizeof(VhostUserMsg)) ? 0 : -1; () not needed around == here. > +} > + > +static int vhost_user_send_fds(int fd, const VhostUserMsg *msg, int *fds, > + size_t fd_num) > +{ > + int r; > + > + struct msghdr msgh; > + struct iovec iov[1]; > + > + size_t fd_size = fd_num * sizeof(int); > + char control[CMSG_SPACE(fd_size)]; > + struct cmsghdr *cmsg; > + > + memset(&msgh, 0, sizeof(msgh)); > + memset(control, 0, sizeof(control)); > + > + /* set the payload */ > + iov[0].iov_base = (void *) msg; Don't put space after ). > + iov[0].iov_len = sizeof(VhostUserMsg); > + > + msgh.msg_iov = iov; > + msgh.msg_iovlen = 1; > + > + if (fd_num) { > + msgh.msg_control = control; > + msgh.msg_controllen = sizeof(control); > + > + cmsg = CMSG_FIRSTHDR(&msgh); > + > + cmsg->cmsg_len = CMSG_LEN(fd_size); > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SCM_RIGHTS; > + memcpy(CMSG_DATA(cmsg), fds, fd_size); > + } else { > + msgh.msg_control = 0; > + msgh.msg_controllen = 0; > + } > + > + do { > + r = sendmsg(fd, &msgh, 0); > + } while (r < 0 && errno == EINTR); Won't this block, making guest unavailable, if server is slow in consuming our messages? > + > + if (r < 0) { > + fprintf(stderr, "Failed to send msg(%d), reason: %s\n", > + msg->request, strerror(errno)); Don't use fprintf for error messages, they might not be seen by management. > + } else { > + r = 0; > + } > + > + return r; > +} > > static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, > void *arg) > { > + int fd = dev->control; > + VhostUserMsg msg; > + int result = 0, need_reply = 0; > + int fds[VHOST_MEMORY_MAX_NREGIONS]; > + size_t fd_num = 0; > + > assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); > - fprintf(stderr, "vhost_user_call not implemented\n"); > > - return -1; > + switch (request) { > + default: > + fprintf(stderr, "vhost-user trying to send unhandled ioctl\n"); > + return -1; > + break; > + } > + > + result = vhost_user_send_fds(fd, &msg, fds, fd_num); I don't get it. So msg is sent without being initialized? > + > + if (!result && need_reply) { > + result = vhost_user_recv(fd, &msg); > + if (!result) { > + switch (request) { > + default: > + break; > + } What does this mean? > + } > + } > + > + return result; > } > > static int vhost_user_init(struct vhost_dev *dev, const char *devpath) > { > + int fd = -1; > + struct sockaddr_un un; > + size_t len; > + > assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); > - fprintf(stderr, "vhost_user_init not implemented\n"); > > - return -1; > + /* Create the socket */ > + fd = socket(AF_UNIX, SOCK_STREAM, 0); > + if (fd == -1) { > + perror("socket"); > + return -1; > + } > + > + un.sun_family = AF_UNIX; > + strcpy(un.sun_path, devpath); > + > + len = sizeof(un.sun_family) + strlen(devpath); > + > + /* Connect */ > + if (connect(fd, (struct sockaddr *) &un, len) == -1) { > + perror("connect"); > + return -1; > + } > + Just connect and start using then? This protocol does not look well thought out. What if you want to extend some messages in the future? Change it in some ways? > + dev->control = fd; > + > + return fd; > } > > static int vhost_user_cleanup(struct vhost_dev *dev) > { > assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); > - fprintf(stderr, "vhost_user_cleanup not implemented\n"); > > - return -1; > + return close(dev->control); > } > > static const VhostOps user_ops = { > -- > 1.8.3.2