On Mon, Feb 23, 2026 at 10:50:05AM -0800, Tanmay Shah wrote:
> Current attach on recovery mechanism loads the clean resource table
> during recovery, but doesn't re-allocate the resources. RPMsg
> communication will fail after recovery due to this. Fix this
> incorrect behavior by doing the full detach and attach of remote
> processor during the recovery. This will load the clean resource table
> and re-allocate all the resources, which will set up correct vring
> information in the resource table.
> 
> Signed-off-by: Tanmay Shah <[email protected]>
> ---
> 
> Changes in v3:
>  - both rproc_attach_recovery() and
>    rproc_boot_recovery() are called the same way.
>  - remove unrelated changes
> 
> Changes in v2:
>  - use rproc_boot instead of rproc_attach
>  - move debug message early in the function
> 
>  drivers/remoteproc/remoteproc_core.c | 33 +++++++++++-----------------
>  1 file changed, 13 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c 
> b/drivers/remoteproc/remoteproc_core.c
> index aada2780b343..790ad7c6d12e 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1777,11 +1777,11 @@ static int rproc_attach_recovery(struct rproc *rproc)
>  {
>       int ret;
>  
> -     ret = __rproc_detach(rproc);
> +     ret = rproc_detach(rproc);
>       if (ret)
>               return ret;
>  
> -     return __rproc_attach(rproc);
> +     return rproc_boot(rproc);
>  }
>  
>  static int rproc_boot_recovery(struct rproc *rproc)
> @@ -1790,10 +1790,14 @@ static int rproc_boot_recovery(struct rproc *rproc)
>       struct device *dev = &rproc->dev;
>       int ret;
>  
> -     ret = rproc_stop(rproc, true);
> +     ret = mutex_lock_interruptible(&rproc->lock);
>       if (ret)
>               return ret;
>  
> +     ret = rproc_stop(rproc, true);
> +     if (ret)
> +             goto unlock_mutex;
> +
>       /* generate coredump */
>       rproc->ops->coredump(rproc);
>  
> @@ -1801,7 +1805,7 @@ static int rproc_boot_recovery(struct rproc *rproc)
>       ret = request_firmware(&firmware_p, rproc->firmware, dev);
>       if (ret < 0) {
>               dev_err(dev, "request_firmware failed: %d\n", ret);
> -             return ret;
> +             goto unlock_mutex;
>       }
>  
>       /* boot the remote processor up again */
> @@ -1809,6 +1813,8 @@ static int rproc_boot_recovery(struct rproc *rproc)
>  
>       release_firmware(firmware_p);
>  
> +unlock_mutex:
> +     mutex_unlock(&rproc->lock);
>       return ret;
>  }
>  
> @@ -1827,26 +1833,13 @@ static int rproc_boot_recovery(struct rproc *rproc)
>  int rproc_trigger_recovery(struct rproc *rproc)
>  {
>       struct device *dev = &rproc->dev;
> -     int ret;
> -
> -     ret = mutex_lock_interruptible(&rproc->lock);
> -     if (ret)
> -             return ret;
> -
> -     /* State could have changed before we got the mutex */
> -     if (rproc->state != RPROC_CRASHED)
> -             goto unlock_mutex;
>  
>       dev_err(dev, "recovering %s\n", rproc->name);
>  
>       if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_ON_RECOVERY))
> -             ret = rproc_attach_recovery(rproc);
> +             return rproc_attach_recovery(rproc);

rproc_trigger_recovery() can be called either from scheduled work or
directly from the debugfs/sysfs interface, it doesn't seem safe to me to
call rproc_attach_recovery() without ensuring mutual exclusion between
multiple parallel callers.

In fact, I can see the relationship between the commit message and the
changes in rproc_attach_recovery() and rproc_detach(), but I'm not sure
why you need to change rproc_boot_recovery() and
rproc_trigger_recovery(). Perhaps you're just missing some explanation
in the commit message?

Regards,
Bjorn

>       else
> -             ret = rproc_boot_recovery(rproc);
> -
> -unlock_mutex:
> -     mutex_unlock(&rproc->lock);
> -     return ret;
> +             return rproc_boot_recovery(rproc);
>  }
>  
>  /**
> @@ -2057,7 +2050,7 @@ int rproc_detach(struct rproc *rproc)
>               return ret;
>       }
>  
> -     if (rproc->state != RPROC_ATTACHED) {
> +     if (rproc->state != RPROC_ATTACHED && rproc->state != RPROC_CRASHED) {
>               ret = -EINVAL;
>               goto out;
>       }
> -- 
> 2.34.1
> 

Reply via email to