On 10/29/2024 1:01 AM, Naman Jain wrote:
> Channel offers are requested during VMBus initialization and resume
> from hibernation. Add support to wait for all channel offers to be
> delivered and processed before returning from vmbus_request_offers.
> 
> This is in analogy to a PCI bus not returning from probe until it has
> scanned all devices on the bus.
> 
> Without this, user mode can race with VMBus initialization and miss
> channel offers. User mode has no way to work around this other than
> sleeping for a while, since there is no way to know when VMBus has
> finished processing offers.
> 
> With this added functionality, remove earlier logic which keeps track
> of count of offered channels post resume from hibernation. Once all
> offers delivered message is received, no further offers are going to
> be received. Consequently, logic to prevent suspend from happening
> after previous resume had missing offers, is also removed.
> 
> Co-developed-by: John Starks <josta...@microsoft.com>
> Signed-off-by: John Starks <josta...@microsoft.com>
> Signed-off-by: Naman Jain <namj...@linux.microsoft.com>
> Reviewed-by: Easwar Hariharan <eahar...@linux.microsoft.com>
> ---
> Changes since v1:
> https://lore.kernel.org/all/20241018115811.5530-1-namj...@linux.microsoft.com
> * Added Easwar's Reviewed-By tag
> * Addressed Michael's comments:
>   * Added explanation of all offers delivered message in comments
>   * Removed infinite wait for offers logic, and changed it wait once.
>   * Removed sub channel workqueue flush logic
>   * Added comments on why MLX device offer is not expected as part of
>     this essential boot offer list. I refrained from adding too many          
>                                               details on it as it felt like 
> it is beyond the scope of this patch                                          
>             series and may not be relevant to this. However, please let me 
> know if
>     something needs to be added.
> * Addressed Saurabh's comments:
>   * Changed timeout value to 10000 ms instead of 10*10000
>   * Changed commit msg as per suggestions
>   * Added a comment for warning case of wait_for_completion timeout
> ---
>  drivers/hv/channel_mgmt.c | 55 ++++++++++++++++++++++++++++-----------
>  drivers/hv/connection.c   |  4 +--
>  drivers/hv/hyperv_vmbus.h | 14 +++-------
>  drivers/hv/vmbus_drv.c    | 16 ------------
>  4 files changed, 45 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index 3c6011a48dab..a2e9ebe5bf72 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -944,16 +944,6 @@ void vmbus_initiate_unload(bool crash)
>               vmbus_wait_for_unload();
>  }
>  
> -static void check_ready_for_resume_event(void)
> -{
> -     /*
> -      * If all the old primary channels have been fixed up, then it's safe
> -      * to resume.
> -      */
> -     if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
> -             complete(&vmbus_connection.ready_for_resume_event);
> -}
> -
>  static void vmbus_setup_channel_state(struct vmbus_channel *channel,
>                                     struct vmbus_channel_offer_channel *offer)
>  {
> @@ -1109,8 +1099,6 @@ static void vmbus_onoffer(struct 
> vmbus_channel_message_header *hdr)
>  
>               /* Add the channel back to the array of channels. */
>               vmbus_channel_map_relid(oldchannel);
> -             check_ready_for_resume_event();
> -
>               mutex_unlock(&vmbus_connection.channel_mutex);
>               return;
>       }
> @@ -1296,13 +1284,22 @@ EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
>  
>  /*
>   * vmbus_onoffers_delivered -
> - * This is invoked when all offers have been delivered.
> + * CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all the essential
> + * boot-time offers are delivered. Other channels can be hot added
> + * or removed later, even immediately after the all-offers-delivered
> + * message. A boot-time offer will be any of the virtual hardware the
> + * VM is configured with at boot.
>   *
> - * Nothing to do here.
> + * Virtual devices like Mellanox NIC may not be included in the list of
> + * these initial boot offers because it is an optional accelerator to
> + * the synthetic VMBus NIC. It is hot added only after the VMBus NIC
> + * channel is opened (once it knows the guest can support it, via the
> + * sriov bit in the netvsc protocol).
>   */
>  static void vmbus_onoffers_delivered(
>                       struct vmbus_channel_message_header *hdr)
>  {
> +     complete(&vmbus_connection.all_offers_delivered_event);
>  }
>  
>  /*
> @@ -1578,7 +1575,8 @@ void vmbus_onmessage(struct 
> vmbus_channel_message_header *hdr)
>  }
>  
>  /*
> - * vmbus_request_offers - Send a request to get all our pending offers.
> + * vmbus_request_offers - Send a request to get all our pending offers
> + * and wait for all offers to arrive.
>   */
>  int vmbus_request_offers(void)
>  {
> @@ -1596,6 +1594,10 @@ int vmbus_request_offers(void)
>  
>       msg->msgtype = CHANNELMSG_REQUESTOFFERS;
>  
> +     /*
> +      * This REQUESTOFFERS message will result in the host sending an all
> +      * offers delivered message.
> +      */
>       ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
>                            true);
>  
> @@ -1607,6 +1609,29 @@ int vmbus_request_offers(void)
>               goto cleanup;
>       }
>  
> +     /*
> +      * Wait for the host to send all offers.
> +      * Keeping it as a best-effort mechanism, where a warning is
> +      * printed if a timeout occurs, and execution is resumed.
> +      */
> +     if (!wait_for_completion_timeout(
> +             &vmbus_connection.all_offers_delivered_event, 
> msecs_to_jiffies(10000))) {
> +             pr_warn("timed out waiting for all offers to be 
> delivered...\n");
> +     }

secs_to_jiffies() has been merged [1] so please update this to a call to
secs_to_jiffies(10). A Cocinelle script will probably take care of it
sooner or later in any case.

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=b35108a51cf7bab58d7eace1267d7965978bcdb8

> +
> +     /*
> +      * Flush handling of offer messages (which may initiate work on
> +      * other work queues).
> +      */
> +     flush_workqueue(vmbus_connection.work_queue);
> +
> +     /*
> +      * Flush workqueues for processing the incoming offers. Subchannel
> +      * offers and processing can happen later, so there is no need to
> +      * flush those workqueues here.
> +      */
> +     flush_workqueue(vmbus_connection.handle_primary_chan_wq);
> +
>  cleanup:
>       kfree(msginfo);
>  
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index f001ae880e1d..8351360bba16 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -34,8 +34,8 @@ struct vmbus_connection vmbus_connection = {
>  
>       .ready_for_suspend_event = COMPLETION_INITIALIZER(
>                                 vmbus_connection.ready_for_suspend_event),
> -     .ready_for_resume_event = COMPLETION_INITIALIZER(
> -                               vmbus_connection.ready_for_resume_event),
> +     .all_offers_delivered_event = COMPLETION_INITIALIZER(
> +                               vmbus_connection.all_offers_delivered_event),
>  };
>  EXPORT_SYMBOL_GPL(vmbus_connection);
>  
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index d2856023d53c..80cc65dac740 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -287,18 +287,10 @@ struct vmbus_connection {
>       struct completion ready_for_suspend_event;
>  
>       /*
> -      * The number of primary channels that should be "fixed up"
> -      * upon resume: these channels are re-offered upon resume, and some
> -      * fields of the channel offers (i.e. child_relid and connection_id)
> -      * can change, so the old offermsg must be fixed up, before the resume
> -      * callbacks of the VSC drivers start to further touch the channels.
> +      * Completed once the host has offered all channels. Note that
> +      * some channels may still be being process on a work queue.
>        */
> -     atomic_t nr_chan_fixup_on_resume;
> -     /*
> -      * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
> -      * drop to zero.
> -      */
> -     struct completion ready_for_resume_event;
> +     struct completion all_offers_delivered_event;
>  };
>  
>  
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 9b15f7daf505..bd3fc41dc06b 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -2427,11 +2427,6 @@ static int vmbus_bus_suspend(struct device *dev)
>       if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
>               wait_for_completion(&vmbus_connection.ready_for_suspend_event);
>  
> -     if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
> -             pr_err("Can not suspend due to a previous failed resuming\n");
> -             return -EBUSY;
> -     }
> -
>       mutex_lock(&vmbus_connection.channel_mutex);
>  
>       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
> @@ -2456,17 +2451,12 @@ static int vmbus_bus_suspend(struct device *dev)
>                       pr_err("Sub-channel not deleted!\n");
>                       WARN_ON_ONCE(1);
>               }
> -
> -             atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
>       }
>  
>       mutex_unlock(&vmbus_connection.channel_mutex);
>  
>       vmbus_initiate_unload(false);
>  
> -     /* Reset the event for the next resume. */
> -     reinit_completion(&vmbus_connection.ready_for_resume_event);
> -
>       return 0;
>  }
>  
> @@ -2502,14 +2492,8 @@ static int vmbus_bus_resume(struct device *dev)
>       if (ret != 0)
>               return ret;
>  
> -     WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
> -
>       vmbus_request_offers();
>  
> -     if (wait_for_completion_timeout(
> -             &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
> -             pr_err("Some vmbus device is missing after suspending?\n");
> -
>       /* Reset the event for the next suspend. */
>       reinit_completion(&vmbus_connection.ready_for_suspend_event);
>  


Reply via email to