On Wed, 22 Apr 2020 08:40:40 +0000 "Zhang, Chen" <chen.zh...@intel.com> wrote:
> > -----Original Message----- > > From: Lukas Straub <lukasstra...@web.de> > > Sent: Thursday, April 9, 2020 2:34 AM > > To: qemu-devel <qemu-devel@nongnu.org> > > Cc: Zhang, Chen <chen.zh...@intel.com>; Li Zhijian > > <lizhij...@cn.fujitsu.com>; Jason Wang <jasow...@redhat.com>; Marc- > > André Lureau <marcandre.lur...@redhat.com>; Paolo Bonzini > > <pbonz...@redhat.com> > > Subject: [PATCH 3/3] net/colo-compare.c: Fix deadlock > > > > The chr_out chardev is connected to a filter-redirector running in the main > > loop. qemu_chr_fe_write_all might block here in compare_chr_send if the > > (socket-)buffer is full. > > If another filter-redirector in the main loop want's to send data to > > chr_pri_in > > it might also block if the buffer is full. This leads to a deadlock because > > both > > event loops get blocked. > > > > Fix this by converting compare_chr_send to a coroutine and return error if > > it > > is in use. > > > > I have tested this series, running fine currently. > Can you share performance data after this patch? > > Thanks > Zhang Chen Hello, Here are the results (using iperf3): Client-to-server tcp: without patch: ~64.2 Mbit/s with patch: ~28.9 Mbit/s Server-to-client tcp: without patch: 360 Kbit/s (when it doesn't deadlock :) with patch: 220 Kbit/s Yeah, it hurts performance somewhat, but the deadlock happens often with lots of server-to-client traffic. (It deadlocked in 2 of 4 runs) Do you have a better idea to solve this issue? Regards, Lukas Straub > > Signed-off-by: Lukas Straub <lukasstra...@web.de> > > --- > > net/colo-compare.c | 82 > > +++++++++++++++++++++++++++++++++++++++------- > > 1 file changed, 71 insertions(+), 11 deletions(-) > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index > > 1de4220fe2..82787d3055 100644 > > --- a/net/colo-compare.c > > +++ b/net/colo-compare.c > > @@ -32,6 +32,9 @@ > > #include "migration/migration.h" > > #include "util.h" > > > > +#include "block/aio-wait.h" > > +#include "qemu/coroutine.h" > > + > > #define TYPE_COLO_COMPARE "colo-compare" > > #define COLO_COMPARE(obj) \ > > OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) @@ -77,6 > > +80,17 @@ static int event_unhandled_count; > > * |packet | |packet + |packet | |packet + > > * +--------+ +--------+ +--------+ +--------+ > > */ > > + > > +typedef struct SendCo { > > + Coroutine *co; > > + uint8_t *buf; > > + uint32_t size; > > + uint32_t vnet_hdr_len; > > + bool notify_remote_frame; > > + bool done; > > + int ret; > > +} SendCo; > > + > > typedef struct CompareState { > > Object parent; > > > > @@ -91,6 +105,7 @@ typedef struct CompareState { > > SocketReadState pri_rs; > > SocketReadState sec_rs; > > SocketReadState notify_rs; > > + SendCo sendco; > > bool vnet_hdr; > > uint32_t compare_timeout; > > uint32_t expired_scan_cycle; > > @@ -699,19 +714,17 @@ static void colo_compare_connection(void > > *opaque, void *user_data) > > } > > } > > > > -static int compare_chr_send(CompareState *s, > > - const uint8_t *buf, > > - uint32_t size, > > - uint32_t vnet_hdr_len, > > - bool notify_remote_frame) > > +static void coroutine_fn _compare_chr_send(void *opaque) > > { > > + CompareState *s = opaque; > > + SendCo *sendco = &s->sendco; > > + const uint8_t *buf = sendco->buf; > > + uint32_t size = sendco->size; > > + uint32_t vnet_hdr_len = sendco->vnet_hdr_len; > > + bool notify_remote_frame = sendco->notify_remote_frame; > > int ret = 0; > > uint32_t len = htonl(size); > > > > - if (!size) { > > - return 0; > > - } > > - > > if (notify_remote_frame) { > > ret = qemu_chr_fe_write_all(&s->chr_notify_dev, > > (uint8_t *)&len, @@ -754,10 +767,50 @@ > > static int > > compare_chr_send(CompareState *s, > > goto err; > > } > > > > - return 0; > > + sendco->ret = 0; > > + goto out; > > > > err: > > - return ret < 0 ? ret : -EIO; > > + sendco->ret = ret < 0 ? ret : -EIO; > > +out: > > + sendco->co = NULL; > > + g_free(sendco->buf); > > + sendco->buf = NULL; > > + sendco->done = true; > > + aio_wait_kick(); > > +} > > + > > +static int compare_chr_send(CompareState *s, > > + const uint8_t *buf, > > + uint32_t size, > > + uint32_t vnet_hdr_len, > > + bool notify_remote_frame) { > > + SendCo *sendco = &s->sendco; > > + > > + if (!size) { > > + return 0; > > + } > > + > > + if (sendco->done) { > > + sendco->co = qemu_coroutine_create(_compare_chr_send, s); > > + sendco->buf = g_malloc(size); > > + sendco->size = size; > > + sendco->vnet_hdr_len = vnet_hdr_len; > > + sendco->notify_remote_frame = notify_remote_frame; > > + sendco->done = false; > > + memcpy(sendco->buf, buf, size); > > + qemu_coroutine_enter(sendco->co); > > + if (sendco->done) { > > + /* report early errors */ > > + return sendco->ret; > > + } else { > > + /* else assume success */ > > + return 0; > > + } > > + } > > + > > + return -ENOBUFS; > > } > > > > static int compare_chr_can_read(void *opaque) @@ -1146,6 +1199,8 @@ > > static void colo_compare_complete(UserCreatable *uc, Error **errp) > > CompareState *s = COLO_COMPARE(uc); > > Chardev *chr; > > > > + s->sendco.done = true; > > + > > if (!s->pri_indev || !s->sec_indev || !s->outdev || !s->iothread) { > > error_setg(errp, "colo compare needs 'primary_in' ," > > "'secondary_in','outdev','iothread' property set"); @@ > > -1281,6 > > +1336,11 @@ static void colo_compare_finalize(Object *obj) > > CompareState *s = COLO_COMPARE(obj); > > CompareState *tmp = NULL; > > > > + AioContext *ctx = iothread_get_aio_context(s->iothread); > > + aio_context_acquire(ctx); > > + AIO_WAIT_WHILE(ctx, !s->sendco.done); > > + aio_context_release(ctx); > > + > > qemu_chr_fe_deinit(&s->chr_pri_in, false); > > qemu_chr_fe_deinit(&s->chr_sec_in, false); > > qemu_chr_fe_deinit(&s->chr_out, false); > > -- > > 2.20.1
pgpsfOh1iB7ig.pgp
Description: OpenPGP digital signature