Solène Rapenne <sol...@perso.pw> writes:

> On Fri, 2023-06-09 at 11:25 -0400, Dave Voutila wrote:
>>
>> Thanks. This feels like bad fd accounting during the fork/exec dance.
>>
>> Sounds like the switch definition and usage isn't required for
>> reproducing?
>
> indeed, you don't need it, a local interface is enough

Can you try this diff? I think I found the issue. In short, a file
descriptor for one of the devices was being closed, that descriptor was
recycled when creating a socketpair, and then being closed again because
of the loop over ther other device fds.

Nice part about this possible fix is it's a net-negative diff.

Sorry it took a month+ to find time to look a this.

diff refs/heads/master refs/heads/vmd-qcow
commit - 4d951e9375c9e68d1aed559bb61502fa0cca5b7a
commit + 72c6b89f61e48347d7c68dfa0cfa2d2b621ecb68
blob - 6167a7764cb2d6e05233260679f3146b095bc768
blob + e8f5a1b162a41dc9d40cd665e0cc5ea22bc44e61
--- usr.sbin/vmd/virtio.c
+++ usr.sbin/vmd/virtio.c
@@ -1349,17 +1349,6 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev
                goto err;
        }

-       /* Keep data file descriptors open after exec. */
-       for (i = 0; i < data_fds_sz; i++) {
-               log_debug("%s: marking fd %d !close-on-exec", __func__,
-                   data_fds[i]);
-               if (fcntl(data_fds[i], F_SETFD, 0)) {
-                       ret = errno;
-                       log_warn("%s: fcntl", __func__);
-                       goto err;
-               }
-       }
-
        /* Fork... */
        dev_pid = fork();
        if (dev_pid == -1) {
@@ -1459,26 +1448,14 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev
                close_fd(async_fds[0]);
                close_fd(sync_fds[0]);

-               /*
-                * Close any other device fd's we know aren't
-                * ours. This releases any exclusive locks held on
-                * things like disk images.
-                */
-               SLIST_FOREACH(d, &virtio_devs, dev_next) {
-                       if (d == dev)
-                               continue;
-
-                       switch (d->dev_type) {
-                       case VMD_DEVTYPE_DISK:
-                               for (j = 0; j < d->vioblk.ndisk_fd; j++)
-                                       close_fd(d->vioblk.disk_fd[j]);
-                               break;
-                       case VMD_DEVTYPE_NET:
-                               close_fd(d->vionet.data_fd);
-                               break;
-                       default:
-                               fatalx("%s: invalid device type '%c'",
-                                   __func__, d->dev_type);
+               /* Keep data file descriptors open after exec. */
+               for (i = 0; i < data_fds_sz; i++) {
+                       log_debug("%s: marking fd %d !close-on-exec", __func__,
+                           data_fds[i]);
+                       if (fcntl(data_fds[i], F_SETFD, 0)) {
+                               ret = errno;
+                               log_warn("%s: fcntl", __func__);
+                               goto err;
                        }
                }

Reply via email to