On Dienstag, 31. August 2021 19:04:54 CEST Greg Kurz wrote: > > Ok, I made a quick "is same thread" assertion check: > > > > diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c > > index 54ee93b71f..bb6ebd16aa 100644 > > --- a/hw/9pfs/virtio-9p-device.c > > +++ b/hw/9pfs/virtio-9p-device.c > > @@ -28,12 +28,25 @@ > > > > #include "qemu/module.h" > > #include "sysemu/qtest.h" > > > > +static void assert_thread(void) { > > + static QemuThread thread; > > + static bool calledBefore; > > + if (!calledBefore) { > > + calledBefore = 1; > > + qemu_thread_get_self(&thread); > > + return; > > + } > > + assert(qemu_thread_is_self(&thread)); > > +} > > + [...] > > And it triggered, however I am not sure if some of those functions I > > asserted above are indeed allowed to be executed on a different thread > > than main thread: > > > > Program terminated with signal SIGABRT, Aborted. > > #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 > > 50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory. > > [Current thread is 1 (Thread 0x7fd0bcef1700 (LWP 6470))] > > Based in the thread number, it seems that the signal was raised by > the main event thread...
No, it was not main thread actually, gdb's "current thread is 1" output is misleading. Following the thread id trace, I extended the thread assertion checks over to v9fs_walk() as well, like this: static void coroutine_fn v9fs_walk(void *opaque) { ... assert_thread(); v9fs_co_run_in_worker({ ... }); assert_thread(); ... } and made sure the reference thread id to be compared is really the main thread. And what happens here is before v9fs_co_run_in_worker() is entered, v9fs_walk() runs on main thread, but after returning from v9fs_co_run_in_worker() it runs on a different thread for some reason, not on main thread as it would be expected at that point. This is my test patch: ---- diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 2815257f42..fa706c10e6 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -1717,6 +1717,8 @@ static void coroutine_fn v9fs_walk(void *opaque) V9fsState *s = pdu->s; V9fsQID qid; + assert_thread(); + err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames); if (err < 0) { pdu_complete(pdu, err); @@ -1724,10 +1726,15 @@ static void coroutine_fn v9fs_walk(void *opaque) } offset += err; + assert_thread(); + trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames); + assert_thread(); + if (nwnames > P9_MAXWELEM) { err = -EINVAL; + assert_thread(); goto out_nofid; } if (nwnames) { @@ -1738,10 +1745,12 @@ static void coroutine_fn v9fs_walk(void *opaque) for (i = 0; i < nwnames; i++) { err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); if (err < 0) { + assert_thread(); goto out_nofid; } if (name_is_illegal(wnames[i].data)) { err = -ENOENT; + assert_thread(); goto out_nofid; } offset += err; @@ -1750,6 +1759,7 @@ static void coroutine_fn v9fs_walk(void *opaque) fidp = get_fid(pdu, fid); if (fidp == NULL) { err = -ENOENT; + assert_thread(); goto out_nofid; } @@ -1767,6 +1777,7 @@ static void coroutine_fn v9fs_walk(void *opaque) * Twalk client request) as small as possible, run all the required fs * driver code altogether inside the following block. */ + assert_thread(); v9fs_co_run_in_worker({ if (v9fs_request_cancelled(pdu)) { err = -EINTR; @@ -1807,6 +1818,7 @@ static void coroutine_fn v9fs_walk(void *opaque) } } }); + assert_thread(); /* * Handle all the rest of this Twalk request on main thread ... */ diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 54ee93b71f..abf8a33b3b 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -27,6 +27,21 @@ #include "qemu/iov.h" #include "qemu/module.h" #include "sysemu/qtest.h" +#include <sys/types.h> +#include <sys/syscall.h> + +static pid_t main_thread_id = -1; + +void assert_thread(void) { + assert(main_thread_id != -1); + pid_t tid = syscall(__NR_gettid); + if (tid != main_thread_id) { + printf("Unexpected thread id %d (should be %d) - process id is %d\n", + tid, main_thread_id, getpid()); + fflush(stdout); + } + assert(tid == main_thread_id); +} static void virtio_9p_push_and_notify(V9fsPDU *pdu) { @@ -34,6 +49,8 @@ static void virtio_9p_push_and_notify(V9fsPDU *pdu) V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; + assert_thread(); + /* push onto queue and notify */ virtqueue_push(v->vq, elem, pdu->size); g_free(elem); @@ -51,6 +68,8 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq) ssize_t len; VirtQueueElement *elem; + assert_thread(); + while ((pdu = pdu_alloc(s))) { P9MsgHeader out; @@ -125,6 +144,8 @@ static ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset, VirtQueueElement *elem = v->elems[pdu->idx]; ssize_t ret; + assert_thread(); + ret = v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap); if (ret < 0) { VirtIODevice *vdev = VIRTIO_DEVICE(v); @@ -143,6 +164,8 @@ static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset, VirtQueueElement *elem = v->elems[pdu->idx]; ssize_t ret; + assert_thread(); + ret = v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap); if (ret < 0) { VirtIODevice *vdev = VIRTIO_DEVICE(v); @@ -160,6 +183,8 @@ static void virtio_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, VirtQueueElement *elem = v->elems[pdu->idx]; size_t buf_size = iov_size(elem->in_sg, elem->in_num); + assert_thread(); + if (buf_size < size) { VirtIODevice *vdev = VIRTIO_DEVICE(v); @@ -180,6 +205,8 @@ static void virtio_init_out_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, VirtQueueElement *elem = v->elems[pdu->idx]; size_t buf_size = iov_size(elem->out_sg, elem->out_num); + assert_thread(); + if (buf_size < size) { VirtIODevice *vdev = VIRTIO_DEVICE(v); @@ -251,6 +278,10 @@ static Property virtio_9p_properties[] = { static void virtio_9p_class_init(ObjectClass *klass, void *data) { + main_thread_id = syscall(__NR_gettid); + printf("Main thread ID is %d\n", main_thread_id); + fflush(stdout); + DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h index 20fa118f3a..eb7d1293cb 100644 --- a/hw/9pfs/virtio-9p.h +++ b/hw/9pfs/virtio-9p.h @@ -17,4 +17,6 @@ struct V9fsVirtioState { #define TYPE_VIRTIO_9P "virtio-9p-device" OBJECT_DECLARE_SIMPLE_TYPE(V9fsVirtioState, VIRTIO_9P) +void assert_thread(void); + #endif ---- Output: Unexpected thread id 10780 (should be 10754) - process id is 10754 qemu-system-x86_64: ../hw/9pfs/virtio-9p-device.c:43: assert_thread: Assertion `tid == main_thread_id' failed. Aborted (core dumped) So the initially captured thread ID 10754 at process startup matches the process ID, hence 10754 was the main thread, and hence the assertion was triggered on a thread that was not main thread. Backtrace: Program terminated with signal SIGABRT, Aborted. #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory. [Current thread is 1 (Thread 0x7fb4637fe700 (LWP 10780))] (gdb) bt #0 0x00007fb7ac6d87bb in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/ sysv/linux/raise.c:50 #1 0x00007fb7ac6c3535 in __GI_abort () at abort.c:79 #2 0x00007fb7ac6c340f in __assert_fail_base (fmt=0x7fb7ac825ee0 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x562e6126072f "tid == main_thread_id", file=0x562e612606fc "../hw/2 #3 0x00007fb7ac6d1102 in __GI___assert_fail (assertion=assertion@entry=0x562e6126072f "tid == main_thread_id", file=file@entry=0x562e612606fc "../hw/9pfs/virtio-9p-device.c", line=line@e1 #4 0x0000562e60ef3965 in assert_thread () at ../hw/9pfs/virtio-9p-device.c:43 #5 0x0000562e60ef3965 in assert_thread () at ../hw/9pfs/virtio-9p-device.c:35 #6 0x0000562e60cef125 in v9fs_walk (opaque=0x562e6405c288) at ../hw/9pfs/ 9p.c:1821 #7 0x0000562e6114f79b in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at ../util/coroutine-ucontext.c:173 #8 0x00007fb7ac6edb50 in __correctly_grouped_prefixwc (begin=0x2 <error: Cannot access memory at address 0x2>, end=0x0, thousands=0 L'\000', grouping=0x7fb7ac6d87bb <__GI_raise+267> "H\213\214$\b\1 #9 0x0000000000000000 in () And 9p.c:1821 is exactly: assert_thread(); v9fs_co_run_in_worker({ if (v9fs_request_cancelled(pdu)) { err = -EINTR; break; } err = s->ops->lstat(&s->ctx, &dpath, &fidst); if (err < 0) { err = -errno; break; } stbuf = fidst; for (name_idx = 0; name_idx < nwnames; name_idx++) { if (v9fs_request_cancelled(pdu)) { err = -EINTR; break; } if (!same_stat_id(&pdu->s->root_st, &stbuf) || strcmp("..", wnames[name_idx].data)) { err = s->ops->name_to_path(&s->ctx, &dpath, wnames[name_idx].data, &path); if (err < 0) { err = -errno; break; } if (v9fs_request_cancelled(pdu)) { err = -EINTR; break; } err = s->ops->lstat(&s->ctx, &path, &stbuf); if (err < 0) { err = -errno; break; } stbufs[name_idx] = stbuf; v9fs_path_copy(&dpath, &path); v9fs_path_copy(&pathes[name_idx], &path); } } }); assert_thread(); //<--- HERE > > > > [Current thread is 1 (Thread 0x7f3bddd2ac40 (LWP 7811))] > > > > (gdb) bt full > > > > #0 0x000055a934dfb9a7 in virtio_pdu_vunmarshal (pdu=0x55a93717cde8, > > > > offset=7, fmt=0x55a9352766d1 "ddw", ap=0x7f38a9ad9cd0) at > > > > ../hw/9pfs/virtio-9p-device.c: 146 > > > > > > > > s = 0x55a93717b4b8 > > > > v = 0x55a93717aee0 > > > > elem = 0x0 > > > > > > So this is v->elems[pdu->idx]... what's the value of pdu->idx ? > > > > In that originally posted core dump it was 113: > > > > #0 virtio_pdu_vunmarshal (pdu=0x55a93717cde8, offset=7, > > fmt=0x55a9352766d1 > > "ddw", ap=0x7f38a9ad9cd0) at ../hw/9pfs/virtio-9p-device.c:146 > > 146 ret = v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, > > 1, fmt, ap); > > [Current thread is 1 (Thread 0x7f3bddd2ac40 (LWP 7811))] > > (gdb) p pdu->idx > > $1 = 113 > > Ok, so it is a valid index (not over MAX_REQ). So it would mean > that someone cleared the slot in our back ? Yes, the slot is within valid boundaries. Assertion checks for that in virtio-9p-device.c in future wouldn't hurt though. Best regards, Christian Schoenebeck