Hi

On Wed, Jun 9, 2021 at 4:21 AM Longpeng (Mike, Cloud Infrastructure Service
Product Dept.) <longpe...@huawei.com> wrote:

>
>
> 在 2021/6/8 23:37, Daniel P. Berrangé 写道:
> > On Tue, Jun 08, 2021 at 04:07:30PM +0200, Markus Armbruster wrote:
> >> "Longpeng (Mike, Cloud Infrastructure Service Product Dept.)"
> >> <longpe...@huawei.com> writes:
> >>
> >>> We find a race during QEMU starting, which would case the QEMU process
> coredump.
> >>>
> >>> <main loop>                             |    <MON iothread>
> >>>                                         |
> >>> [1] create MON chardev                  |
> >>> qemu_create_early_backends              |
> >>>   chardev_init_func                     |
> >>>                                         |
> >>> [2] create MON iothread                 |
> >>> qemu_create_late_backends               |
> >>>   mon_init_func                         |
> >>>     aio_bh_schedule----------------------->
> monitor_qmp_setup_handlers_bh
> >>> [3] enter main loog                     |
> tcp_chr_update_read_handler
> >>> (* A client come in, e.g. Libvirt *)    |      update_ioc_handlers
> >>> tcp_chr_new_client                      |
> >>>   update_ioc_handlers                   |
> >>>                                         |
> >>>     [4] create new hup_source           |
> >>>         s->hup_source = *PTR1*          |
> >>>           g_source_attach(s->hup_source)|
> >>>                                         |        [5]
> remove_hup_source(*PTR1*)
> >>>                                         |            (create new
> hup_source)
> >>>                                         |             s->hup_source =
> *PTR2*
> >>>         [6] g_source_attach_unlocked    |
> >>>               *PTR1* is freed by [5]    |
> >>>
> >>> Do you have any suggestion to fix this bug ? Thanks!
> >>
> >> Do we?  We talked, but I'm not sure we reached a conclusion.
> >
> > Seems like we ended up with two options.
> >
> >   1. A workaround for the current  specific problem by rearranging
> >      the initilization code in the monitor a little.
> >
> >   2. A design fix of splitting the chardev creation into two
> >      parts, one creation, and one activation.
> >
> > The latter is significantly more work, but is a better long term bet
> IMHO.
> > But what we really is someone motivated to actually implement one of the
> > two options.
> >
>
> How about the following implementation of option-1 ? We've tested it for
> several
> weeks, it works fine.
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
>

If we go with this option, disable_handler will need to be implemented for
all chardevs that have some (or use a QEMU_CHAR_FEATURE_DISABLE_HANDLER
flag, which will limit the chardevs that can be associated with a monitor
and knowingly break previously working setups).


index a484641..ecb3db9 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -722,6 +722,19 @@ static void tcp_chr_update_read_handler(Chardev *chr)
>      update_ioc_handlers(s);
>  }
>
> +static void tcp_chr_disable_handler(Chardev *chr)
> +{
> +    SocketChardev *s = SOCKET_CHARDEV(chr);
> +
> +    if (s->listener && s->state == TCP_CHARDEV_STATE_DISCONNECTED) {
> +        qio_net_listener_set_client_func_full(s->listener, NULL, NULL,
> +                                              NULL, chr->gcontext);
> +    }
> +
> +    remove_fd_in_watch(chr);
> +    remove_hup_source(s);
> +}
> +
>  static bool tcp_chr_is_connected(Chardev *chr)
>  {
>      SocketChardev *s = SOCKET_CHARDEV(chr);
> @@ -1703,6 +1716,7 @@ static void char_socket_class_init(ObjectClass *oc,
> void
> *data)
>      cc->chr_add_watch = tcp_chr_add_watch;
>      cc->chr_set_reconnect_time = tcp_chr_set_reconnect_time;
>      cc->chr_update_read_handler = tcp_chr_update_read_handler;
> +    cc->chr_disable_handler = tcp_chr_disable_handler;
>      cc->chr_is_connected = tcp_chr_is_connected;
>      cc->chr_get_connect_id = tcp_chr_get_connect_id;
>
> diff --git a/chardev/char.c b/chardev/char.c
> index ff0a3cf..990fe4f 100644
> --- a/chardev/char.c
> +++ b/chardev/char.c
> @@ -238,6 +238,15 @@ void qemu_chr_be_update_read_handlers(Chardev *s,
>      }
>  }
>
> +void qemu_chr_be_disable_handlers(Chardev *s)
> +{
> +    ChardevClass *cc = CHARDEV_GET_CLASS(s);
> +
> +    if (cc->chr_disable_handler) {
> +        cc->chr_disable_handler(s);
> +    }
> +}
> +
>  int qemu_chr_add_client(Chardev *s, int fd)
>  {
>      return CHARDEV_GET_CLASS(s)->chr_add_client ?
> diff --git a/include/chardev/char.h b/include/chardev/char.h
> index d1ec628..7a8c740 100644
> --- a/include/chardev/char.h
> +++ b/include/chardev/char.h
> @@ -212,6 +212,8 @@ void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf,
> int len);
>  void qemu_chr_be_update_read_handlers(Chardev *s,
>                                        GMainContext *context);
>
> +void qemu_chr_be_disable_handlers(Chardev *s);
> +
>  /**
>   * qemu_chr_be_event:
>   * @event: the event to send
> @@ -282,6 +284,7 @@ typedef struct ChardevClass {
>      int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len);
>      GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond);
>      void (*chr_update_read_handler)(Chardev *s);
> +    void (*chr_disable_handler)(Chardev *s);
>      int (*chr_ioctl)(Chardev *s, int cmd, void *arg);
>      int (*get_msgfds)(Chardev *s, int* fds, int num);
>      int (*set_msgfds)(Chardev *s, int *fds, int num);
> diff --git a/monitor/qmp.c b/monitor/qmp.c
> index 9a69ae4..2c2248c 100644
> --- a/monitor/qmp.c
> +++ b/monitor/qmp.c
> @@ -413,11 +413,13 @@ void monitor_init_qmp(Chardev *chr, bool pretty)
>           * e.g. the chardev is in client mode, with wait=on.
>           */
>          remove_fd_in_watch(chr);
> +
>          /*
> -         * We can't call qemu_chr_fe_set_handlers() directly here
> -         * since chardev might be running in the monitor I/O
> -         * thread.  Schedule a bottom half.
> +         * Before schedule a bottom half, we should clean up the handler
> in the
> +         * default context to prevent the race between main thread and
> iothread
>           */
> +        qemu_chr_be_disable_handlers(chr);
> +
>          aio_bh_schedule_oneshot(iothread_get_aio_context(mon_iothread),
>                                  monitor_qmp_setup_handlers_bh, mon);
>          /* The bottom half will add @mon to @mon_list */
> --
> 1.8.3.1
>
>
> > Regards,
> > Daniel
> >
>
>

-- 
Marc-André Lureau

Reply via email to