On 7/1/20 9:39 PM, Laurent Vivier wrote: > qemu_set_nonblock() checks that the file descriptor can be used and, if > not, crashes QEMU. An assert() is used for that. The use of assert() is > used to detect programming error and the coredump will allow to debug > the problem. > > But in the case of the tap device, this assert() can be triggered by > a misconfiguration by the user. At startup, it's not a real problem, but it > can also happen during the hot-plug of a new device, and here it's a > problem because we can crash a perfectly healthy system. > > For instance: > # ip link add link virbr0 name macvtap0 type macvtap mode bridge > # ip link set macvtap0 up > # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1) > # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 > -monitor stdio 9<> $TAP > (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9 > (qemu) device_add > driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0 > (qemu) device_del net0 > (qemu) netdev_del hostnet0 > (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9 > qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion > `f != -1' failed. > Aborted (core dumped) > > To avoid that, add a function, qemu_try_set_nonblock(), that allows to report > the > problem without crashing. > > Signed-off-by: Laurent Vivier <lviv...@redhat.com> > --- > include/qemu/sockets.h | 1 + > net/tap.c | 16 +++++++++--- > util/oslib-posix.c | 26 +++++++++++++------ > util/oslib-win32.c | 57 ++++++++++++++++++++++++------------------ > 4 files changed, 64 insertions(+), 36 deletions(-) > > diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h > index 57cd049d6edd..7d1f8135767d 100644 > --- a/include/qemu/sockets.h > +++ b/include/qemu/sockets.h > @@ -18,6 +18,7 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t > *addrlen); > int socket_set_cork(int fd, int v); > int socket_set_nodelay(int fd); > void qemu_set_block(int fd); > +int qemu_try_set_nonblock(int fd); > void qemu_set_nonblock(int fd); > int socket_set_fast_reuse(int fd); > > diff --git a/net/tap.c b/net/tap.c > index 6207f61f84ab..fb04c9044ce2 100644 > --- a/net/tap.c > +++ b/net/tap.c > @@ -766,6 +766,7 @@ int net_init_tap(const Netdev *netdev, const char *name, > Error *err = NULL; > const char *vhostfdname; > char ifname[128]; > + int ret = 0;
No need to zero-initialize. Otherwise: Reviewed-by: Philippe Mathieu-Daudé <phi...@redhat.com> > > assert(netdev->type == NET_CLIENT_DRIVER_TAP); > tap = &netdev->u.tap; > @@ -795,7 +796,12 @@ int net_init_tap(const Netdev *netdev, const char *name, > return -1; > } > > - qemu_set_nonblock(fd); > + ret = qemu_try_set_nonblock(fd); > + if (ret < 0) { > + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", > + name, fd); > + return -1; > + } > > vnet_hdr = tap_probe_vnet_hdr(fd); > > @@ -810,7 +816,6 @@ int net_init_tap(const Netdev *netdev, const char *name, > char **fds; > char **vhost_fds; > int nfds = 0, nvhosts = 0; > - int ret = 0; > > if (tap->has_ifname || tap->has_script || tap->has_downscript || > tap->has_vnet_hdr || tap->has_helper || tap->has_queues || > @@ -843,7 +848,12 @@ int net_init_tap(const Netdev *netdev, const char *name, > goto free_fail; > } > > - qemu_set_nonblock(fd); > + ret = qemu_try_set_nonblock(fd); > + if (ret < 0) { > + error_setg_errno(errp, -ret, "%s: Can't use file descriptor > %d", > + name, fd); > + goto free_fail; > + } > > if (i == 0) { > vnet_hdr = tap_probe_vnet_hdr(fd); > diff --git a/util/oslib-posix.c b/util/oslib-posix.c > index 916f1be2243a..149254cd691f 100644 > --- a/util/oslib-posix.c > +++ b/util/oslib-posix.c > @@ -253,25 +253,35 @@ void qemu_set_block(int fd) > assert(f != -1); > } > > -void qemu_set_nonblock(int fd) > +int qemu_try_set_nonblock(int fd) > { > int f; > f = fcntl(fd, F_GETFL); > - assert(f != -1); > - f = fcntl(fd, F_SETFL, f | O_NONBLOCK); > -#ifdef __OpenBSD__ > if (f == -1) { > + return -errno; > + } > + if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) { > +#ifdef __OpenBSD__ > /* > * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on > * memory devices and sets errno to ENODEV. > * It's OK if we fail to set O_NONBLOCK on devices like /dev/null, > * because they will never block anyway. > */ > - assert(errno == ENODEV); > - } > -#else > - assert(f != -1); > + if (errno == ENODEV) { > + return 0; > + } > #endif > + return -errno; > + } > + return 0; > +} > + > +void qemu_set_nonblock(int fd) > +{ > + int f; > + f = qemu_try_set_nonblock(fd); > + assert(f == 0); > } > > int socket_set_fast_reuse(int fd) > diff --git a/util/oslib-win32.c b/util/oslib-win32.c > index e9b14ab17847..5548ce6038f3 100644 > --- a/util/oslib-win32.c > +++ b/util/oslib-win32.c > @@ -132,31 +132,6 @@ struct tm *localtime_r(const time_t *timep, struct tm > *result) > } > #endif /* CONFIG_LOCALTIME_R */ > > -void qemu_set_block(int fd) > -{ > - unsigned long opt = 0; > - WSAEventSelect(fd, NULL, 0); > - ioctlsocket(fd, FIONBIO, &opt); > -} > - > -void qemu_set_nonblock(int fd) > -{ > - unsigned long opt = 1; > - ioctlsocket(fd, FIONBIO, &opt); > - qemu_fd_register(fd); > -} > - > -int socket_set_fast_reuse(int fd) > -{ > - /* Enabling the reuse of an endpoint that was used by a socket still in > - * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On > Windows > - * fast reuse is the default and SO_REUSEADDR does strange things. So we > - * don't have to do anything here. More info can be found at: > - * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx > */ > - return 0; > -} > - > - > static int socket_error(void) > { > switch (WSAGetLastError()) { > @@ -233,6 +208,38 @@ static int socket_error(void) > } > } > > +void qemu_set_block(int fd) > +{ > + unsigned long opt = 0; > + WSAEventSelect(fd, NULL, 0); > + ioctlsocket(fd, FIONBIO, &opt); > +} > + > +int qemu_try_set_nonblock(int fd) > +{ > + unsigned long opt = 1; > + if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) { > + return -socket_error(); > + } > + qemu_fd_register(fd); > + return 0; > +} > + > +void qemu_set_nonblock(int fd) > +{ > + (void)qemu_try_set_nonblock(fd); > +} > + > +int socket_set_fast_reuse(int fd) > +{ > + /* Enabling the reuse of an endpoint that was used by a socket still in > + * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On > Windows > + * fast reuse is the default and SO_REUSEADDR does strange things. So we > + * don't have to do anything here. More info can be found at: > + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx > */ > + return 0; > +} > + > int inet_aton(const char *cp, struct in_addr *ia) > { > uint32_t addr = inet_addr(cp); >