On Thu, 2025-07-10 at 14:11 -0400, Benjamin Marzinski wrote:
> When a reservation key is changed from one non-zero value to another,
> libmpathpersist checks if the old key is still holding the
> reservation,
> and preempts it if it is. This was only safe if two nodes never share
> the same key. If a node uses the same key as another node that is
> holding the reservation, and switches keys so that it no longer
> matches,
> it will end up preempting the reservation. This is clearly unexpected
> behavior, and it can come about by a simple accident of registering a
> device with the wrong key, and then immediately fixing it.
> 
> To handle this, add code to track if a device is the reservation
> holder
> to multipathd. multipathd now has three new commands "getprhold",
> "setprhold", and "unsetprhold". These commands work like the
> equivalent
> *prstatus commands. libmpathpersist calls setprhold on RESERVE
> commands
> and PREEMPT commands when the preempted key is holding the
> reservation
> and unsetprhold on RELEASE commands. Also, calling unsetprflag causes
> prhold to be unset as well, so CLEAR commands and REGISTER commands
> with
> a 0x0 service action key will also unset prhold. libmpathpersist()
> will
> also unset prhold if it notices that the device cannot be holding a
> reservation in preempt_missing_path().
> 
> When a new multipath device is created, its initial prhold state is
> PR_UNKNOWN until it checks the current reservation, just like with
> prflag. If multipathd ever finds that a device's registration has
> been
> preempted or cleared in update_map_pr(), it unsets prhold, just like
> with prflag.
> 
> Now, before libmpathpersist preempts a reservation when changing keys
> it also checks if multipathd thinks that the device is holding
> the reservation. If it does not, then libmpathpersist won't preempt
> the key. It will assume that another node is holding the reservation
> with the same key.
> 
> I should note that this safety check only stops a node not holding
> the
> reservation from preempting the node holding the reservation. If the
> node holding the reservation changes its key, but it fails to change
> the
> resevation key, because that path is down or gone, it will still
> issue
> the preempt to move the reservation to a usable path, even if another
> node is using the same key. This will remove the registrations for
> that
> other node. It also will not work correctly if multipathd stops
> tracking
> a device for some reason. It's only a best-effort safety check.
> 
> Signed-off-by: Benjamin Marzinski <bmarz...@redhat.com>
> ---
>  libmpathpersist/mpath_persist_int.c | 87 ++++++++++++++++++++++++---
> --
>  libmpathpersist/mpath_updatepr.c    | 19 ++++++-
>  libmpathpersist/mpathpr.h           |  2 +
>  libmultipath/structs.h              |  1 +
>  multipathd/callbacks.c              |  3 +
>  multipathd/cli.c                    |  4 +-
>  multipathd/cli.h                    |  3 +
>  multipathd/cli_handlers.c           | 48 ++++++++++++++++
>  multipathd/main.c                   | 33 ++++++++++-
>  9 files changed, 182 insertions(+), 18 deletions(-)
> 

Two questions all the way down.


> diff --git a/libmpathpersist/mpath_persist_int.c
> b/libmpathpersist/mpath_persist_int.c
> index ae0defc2..91f0d018 100644
> --- a/libmpathpersist/mpath_persist_int.c
> +++ b/libmpathpersist/mpath_persist_int.c
> @@ -260,6 +260,10 @@ static int mpath_prout_common(struct multipath
> *mpp,int rq_servact, int rq_scope
>   * holding the reservation on a path that couldn't get its key
> updated,
>   * either because it is down or no longer part of the multipath
> device,
>   * you need to preempt the reservation to a usable path with the new
> key
> + *
> + * Also, it's possible that the reservation was preempted, and the
> device
> + * is being re-registered. If it appears that is the case, clear
> + * mpp->prhold in multipathd.
>   */
>  void preempt_missing_path(struct multipath *mpp, uint8_t *key,
> uint8_t *sa_key,
>                         int noisy)
> @@ -272,12 +276,19 @@ void preempt_missing_path(struct multipath
> *mpp, uint8_t *key, uint8_t *sa_key,
>       int status;
>  
>       /*
> -      * If you previously didn't have a key registered or you
> didn't
> -      * switch to a different key, there's no need to preempt.
> Also, you
> -      * can't preempt if you no longer have a registered key
> +      * If you previously didn't have a key registered, you can't
> +      * be holding the reservation. Also, you can't preempt if
> you
> +      * no longer have a registered key
>        */
> -     if (memcmp(key, zero, 8) == 0 || memcmp(sa_key, zero, 8) ==
> 0 ||
> -         memcmp(key, sa_key, 8) == 0)
> +     if (memcmp(key, zero, 8) == 0 || memcmp(sa_key, zero, 8) ==
> 0) {
> +             update_prhold(mpp->alias, false);
> +             return;
> +     }
> +     /*
> +      * If you didn't switch to a different key, there is no need
> to
> +      * preempt.
> +      */
> +     if (memcmp(key, sa_key, 8) == 0)
>               return;
>  
>       status = mpath_prin_activepath(mpp, MPATH_PRIN_RRES_SA,
> &resp, noisy);
> @@ -286,13 +297,29 @@ void preempt_missing_path(struct multipath
> *mpp, uint8_t *key, uint8_t *sa_key,
>               return;
>       }
>       /* If there is no reservation, there's nothing to preempt */
> -     if (!resp.prin_descriptor.prin_readresv.additional_length)
> +     if (!resp.prin_descriptor.prin_readresv.additional_length) {
> +             update_prhold(mpp->alias, false);
>               return;
> +     }
>       /*
>        * If there reservation is not held by the old key, you
> don't
>        * want to preempt it
>        */
> -     if (memcmp(key, resp.prin_descriptor.prin_readresv.key, 8)
> != 0)
> +     if (memcmp(key, resp.prin_descriptor.prin_readresv.key, 8)
> != 0) {
> +             /*
> +              * If reseravation key doesn't match either the old
> or
> +              * the new key, then clear prhold.
> +              */
> +             if (memcmp(sa_key,
> resp.prin_descriptor.prin_readresv.key, 8) != 0)
> +                     update_prhold(mpp->alias, false);
> +             return;
> +     }
> +
> +     /*
> +      * If multipathd doesn't think it is holding the
> reservation, don't
> +      * preempt it
> +      */
> +     if (get_prhold(mpp->alias) != PR_SET)
>               return;
>       /* Assume this key is being held by an inaccessable path on
> this
>        * node. libmpathpersist has never worked if multiple nodes
> share
> @@ -640,19 +667,38 @@ fail_resume:
>       return (status == MPATH_PR_RETRYABLE_ERROR) ? MPATH_PR_OTHER
> : status;
>  }
>  
> +static int reservation_key_matches(struct multipath *mpp, uint8_t
> *key,
> +                                int noisy)
> +{
> +     struct prin_resp resp = {0};
> +     int status;
> +
> +     status = mpath_prin_activepath(mpp, MPATH_PRIN_RRES_SA,
> &resp, noisy);
> +     if (status != MPATH_PR_SUCCESS) {
> +             condlog(0, "%s: pr in read reservation command
> failed.",
> +                     mpp->wwid);
> +             return YNU_UNDEF;
> +     }
> +     if (!resp.prin_descriptor.prin_readresv.additional_length)
> +             return YNU_NO;
> +     if (memcmp(key, resp.prin_descriptor.prin_readresv.key, 8)
> == 0)
> +             return YNU_YES;
> +     return YNU_NO;
> +}
> +
>  /*
>   * for MPATH_PROUT_REG_IGN_SA, we use the ignored paramp->key to
> store the
> - * currently registered key.
> + * currently registered key for use in preempt_missing_path(), but
> only if
> + * the key is holding the reservation.
>   */
>  static void set_ignored_key(struct multipath *mpp, uint8_t *key)
>  {
>       memset(key, 0, 8);
>       if (!get_be64(mpp->reservation_key))
>               return;
> -     if (get_prflag(mpp->alias) == PR_UNSET)
> +     if (get_prhold(mpp->alias) == PR_UNSET)
>               return;
> -     update_map_pr(mpp, NULL);
> -     if (mpp->prflag != PR_SET)
> +     if (reservation_key_matches(mpp, key, 0) == YNU_NO)
>               return;
>       memcpy(key, &mpp->reservation_key, 8);
>  }
> @@ -665,6 +711,7 @@ int do_mpath_persistent_reserve_out(vector curmp,
> vector pathvec, int fd,
>       int ret;
>       uint64_t prkey;
>       struct config *conf;
> +     bool preempting_reservation = false;
>  
>       ret = mpath_get_map(curmp, pathvec, fd, &mpp);
>       if (ret != MPATH_PR_SUCCESS)
> @@ -715,9 +762,12 @@ int do_mpath_persistent_reserve_out(vector
> curmp, vector pathvec, int fd,
>       case MPATH_PROUT_REG_IGN_SA:
>               ret= mpath_prout_reg(mpp, rq_servact, rq_scope,
> rq_type, paramp, noisy);
>               break;
> -     case MPATH_PROUT_RES_SA :
> -     case MPATH_PROUT_PREE_SA :
> -     case MPATH_PROUT_PREE_AB_SA :
> +     case MPATH_PROUT_PREE_SA:
> +     case MPATH_PROUT_PREE_AB_SA:
> +             if (reservation_key_matches(mpp, paramp->sa_key,
> noisy) == YNU_YES)
> +                     preempting_reservation = true;
> +             /* fallthrough */
> +     case MPATH_PROUT_RES_SA:
>       case MPATH_PROUT_CLEAR_SA:
>               ret = mpath_prout_common(mpp, rq_servact, rq_scope,
> rq_type,
>                                        paramp, noisy, NULL);
> @@ -744,6 +794,15 @@ int do_mpath_persistent_reserve_out(vector
> curmp, vector pathvec, int fd,
>       case MPATH_PROUT_CLEAR_SA:
>               update_prflag(mpp->alias, 0);
>               update_prkey(mpp->alias, 0);
> +             break;
> +     case MPATH_PROUT_RES_SA:
> +     case MPATH_PROUT_REL_SA:
> +             update_prhold(mpp->alias, (rq_servact ==
> MPATH_PROUT_RES_SA));
> +             break;
> +     case MPATH_PROUT_PREE_SA:
> +     case MPATH_PROUT_PREE_AB_SA:
> +             if (preempting_reservation)
> +                     update_prhold(mpp->alias, 1);
>       }
>       return ret;
>  }
> diff --git a/libmpathpersist/mpath_updatepr.c
> b/libmpathpersist/mpath_updatepr.c
> index c8fbe4a2..dd8dd48e 100644
> --- a/libmpathpersist/mpath_updatepr.c
> +++ b/libmpathpersist/mpath_updatepr.c
> @@ -75,13 +75,13 @@ static int do_update_pr(char *alias, char *cmd,
> char *key)
>       return ret;
>  }
>  
> -int get_prflag(char *mapname)
> +static int do_get_pr(char *mapname, const char *cmd)
>  {
>       char str[256];
>       char *reply;
>       int prflag;
>  
> -     snprintf(str, sizeof(str), "getprstatus map %s", mapname);
> +     snprintf(str, sizeof(str), "%s map %s", cmd, mapname);
>       reply = do_pr(mapname, str);
>       if (!reply)
>               prflag = PR_UNKNOWN;
> @@ -96,11 +96,26 @@ int get_prflag(char *mapname)
>       return prflag;
>  }
>  
> +int get_prflag(char *mapname)
> +{
> +     return do_get_pr(mapname, "getprstatus");
> +}
> +
> +int get_prhold(char *mapname)
> +{
> +     return do_get_pr(mapname, "getprhold");
> +}
> +
>  int update_prflag(char *mapname, int set) {
>       return do_update_pr(mapname, (set)? "setprstatus" :
> "unsetprstatus",
>                           NULL);
>  }
>  
> +int update_prhold(char *mapname, bool set)
> +{
> +     return do_update_pr(mapname, (set) ? "setprhold" :
> "unsetprhold", NULL);
> +}
> +
>  int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t
> sa_flags) {
>       char str[256];
>  
> diff --git a/libmpathpersist/mpathpr.h b/libmpathpersist/mpathpr.h
> index 912957e5..99d6b82f 100644
> --- a/libmpathpersist/mpathpr.h
> +++ b/libmpathpersist/mpathpr.h
> @@ -9,6 +9,8 @@
>  int update_prflag(char *mapname, int set);
>  int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t
> sa_flags);
>  int get_prflag(char *mapname);
> +int get_prhold(char *mapname);
> +int update_prhold(char *mapname, bool set);
>  #define update_prkey(mapname, prkey) update_prkey_flags(mapname,
> prkey, 0)
>  
>  #endif
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 6c427d6f..97093675 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -522,6 +522,7 @@ struct multipath {
>       struct be64 reservation_key;
>       uint8_t sa_flags;
>       int prflag;
> +     int prhold;
>       int all_tg_pt;
>       struct gen_multipath generic_mp;
>       bool fpin_must_reload;
> diff --git a/multipathd/callbacks.c b/multipathd/callbacks.c
> index fb87b280..b6b57f45 100644
> --- a/multipathd/callbacks.c
> +++ b/multipathd/callbacks.c
> @@ -69,4 +69,7 @@ void init_handler_callbacks(void)
>       set_handler_callback(VRB_UNSETMARGINAL | Q1_PATH,
> HANDLER(cli_unset_marginal));
>       set_handler_callback(VRB_UNSETMARGINAL | Q1_MAP,
>                            HANDLER(cli_unset_all_marginal));
> +     set_handler_callback(VRB_GETPRHOLD | Q1_MAP,
> HANDLER(cli_getprhold));
> +     set_handler_callback(VRB_SETPRHOLD | Q1_MAP,
> HANDLER(cli_setprhold));
> +     set_handler_callback(VRB_UNSETPRHOLD | Q1_MAP,
> HANDLER(cli_unsetprhold));
>  }
> diff --git a/multipathd/cli.c b/multipathd/cli.c
> index 34588290..d0e6cebc 100644
> --- a/multipathd/cli.c
> +++ b/multipathd/cli.c
> @@ -224,7 +224,9 @@ load_keys (void)
>       r += add_key(keys, "setmarginal", VRB_SETMARGINAL, 0);
>       r += add_key(keys, "unsetmarginal", VRB_UNSETMARGINAL, 0);
>       r += add_key(keys, "all", KEY_ALL, 0);
> -
> +     r += add_key(keys, "getprhold", VRB_GETPRHOLD, 0);
> +     r += add_key(keys, "setprhold", VRB_SETPRHOLD, 0);
> +     r += add_key(keys, "unsetprhold", VRB_UNSETPRHOLD, 0);
>  
>       if (r) {
>               free_keys(keys);
> diff --git a/multipathd/cli.h b/multipathd/cli.h
> index 9fa50145..5a943a45 100644
> --- a/multipathd/cli.h
> +++ b/multipathd/cli.h
> @@ -38,6 +38,9 @@ enum {
>       VRB_UNSETMARGINAL       = 23,
>       VRB_SHUTDOWN            = 24,
>       VRB_QUIT                = 25,
> +     VRB_GETPRHOLD           = 26,
> +     VRB_SETPRHOLD           = 27,
> +     VRB_UNSETPRHOLD         = 28,
>  
>       /* Qualifiers, values must be different from verbs */
>       KEY_PATH                = 65,
> diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
> index a92b07ce..34910c9b 100644
> --- a/multipathd/cli_handlers.c
> +++ b/multipathd/cli_handlers.c
> @@ -1314,6 +1314,10 @@ cli_unsetprstatus(void * v, struct strbuf
> *reply, void * data)
>               mpp->prflag = PR_UNSET;
>               condlog(2, "%s: prflag unset", param);
>       }
> +     if (mpp->prhold != PR_UNSET) {
> +             mpp->prhold = PR_UNSET;
> +             condlog(2, "%s: prhold unset (by clearing prflag)",
> param);
> +     }
>  
>       return 0;
>  }
> @@ -1401,6 +1405,50 @@ cli_setprkey(void * v, struct strbuf *reply,
> void * data)
>       return ret;
>  }
>  
> +static int do_prhold(struct vectors *vecs, char *param, int prhold)
> +{
> +     struct multipath *mpp = find_mp_by_str(vecs->mpvec, param);
> +
> +     if (!mpp)
> +             return -ENODEV;
> +
> +     if (mpp->prhold != prhold) {
> +             mpp->prhold = prhold;
> +             condlog(2, "%s: prhold %s", param, pr_str[prhold]);
> +     }
> +
> +     return 0;
> +}
> +
> +static int cli_setprhold(void *v, struct strbuf *reply, void *data)
> +{
> +     return do_prhold((struct vectors *)data, get_keyparam(v,
> KEY_MAP),
> +                      PR_SET);
> +}
> +
> +static int cli_unsetprhold(void *v, struct strbuf *reply, void
> *data)
> +{
> +     return do_prhold((struct vectors *)data, get_keyparam(v,
> KEY_MAP),
> +                      PR_UNSET);
> +}
> +
> +static int cli_getprhold(void *v, struct strbuf *reply, void *data)
> +{
> +     struct multipath *mpp;
> +     struct vectors *vecs = (struct vectors *)data;
> +     char *param = get_keyparam(v, KEY_MAP);
> +
> +     param = convert_dev(param, 0);
> +
> +     mpp = find_mp_by_str(vecs->mpvec, param);
> +     if (!mpp)
> +             return -ENODEV;
> +
> +     append_strbuf_str(reply, pr_str[mpp->prhold]);
> +     condlog(3, "%s: reply = %s", param, get_strbuf_str(reply));
> +     return 0;
> +}
> +
>  static int cli_set_marginal(void * v, struct strbuf *reply, void *
> data)
>  {
>       struct vectors * vecs = (struct vectors *)data;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 582af880..d22425f6 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -4222,6 +4222,32 @@ main (int argc, char *argv[])
>               return (child(NULL));
>  }
>  
> +static void check_prhold(struct multipath *mpp, struct path *pp)
> +{
> +     struct prin_resp resp = {0};
> +     int status;
> +
> +     if (mpp->prflag == PR_UNSET) {
> +             mpp->prhold = PR_UNSET;
> +             return;
> +     }
> +     if (mpp->prflag != PR_SET || mpp->prhold != PR_UNKNOWN)
> +             return;

How can this situation come to pass? prflag must be PR_UNKNOWN. 
Shouldn't we reset prhold to PR_UNKNOWN as well?

> +
> +     status = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RRES_SA,
> &resp, 0);
> +     if (status != MPATH_PR_SUCCESS) {
> +             condlog (0, "%s: pr in read reservation command
> failed: %d",
> +                      mpp->wwid, status);
> +             return;
> +     }
> +     mpp->prhold = PR_UNSET;
> +     if (!resp.prin_descriptor.prin_readresv.additional_length)
> +             return;
> +
> +     if (memcmp(&mpp->reservation_key,
> resp.prin_descriptor.prin_readresv.key, 8) == 0)
> +             mpp->prhold = PR_SET;
> +}
> +
>  static void mpath_pr_event_handle(struct path *pp)
>  {
>       struct multipath *mpp = pp->mpp;
> @@ -4233,8 +4259,13 @@ static void mpath_pr_event_handle(struct path
> *pp)
>               return;
>       }
>  
> -     if (update_map_pr(mpp, pp) != MPATH_PR_SUCCESS)
> +     if (update_map_pr(mpp, pp) != MPATH_PR_SUCCESS) {
> +             if (mpp->prflag == PR_UNSET)
> +                     mpp->prhold = PR_UNSET;

Perhaps we should move setting prhold to update_map_pr()?

Martin

>               return;
> +     }
> +
> +     check_prhold(mpp, pp);
>  
>       if (mpp->prflag != PR_SET)
>               return;

Reply via email to