On Tue, 2025-09-23 at 18:12 -0400, Benjamin Marzinski wrote:
> There were two problems with how libmpathpersist handled
> unregistering
> a key while holding the reseravation (which should also release the
> reservation).
> 1. If the path holding the reservation is not unregistered first,
> there
> will be unregistered paths, while a reservation is still held,
> which
> would cause IO to those paths to fail, when it shouldn't.
> 2. If the path that holds the reservation is down, libmpathpersist
> was
> not clearing the resrvation, since the there were no registered
> keys
> it could use for the PREEMPT command workaround
>
> To fix these, libmpathpersist now releases the reservation first when
> trying to unregister a key that is holding the reservation.
> mpath_prout_rel() has a new option so that if it needs to self
> preempt
> to clear the reservation, it won't re-register the paths when called
> as part of unregistering a key. Also, instead of checking if the
> device
> is currently holding a reservation using mpp->reservation_key in
> check_holding_reservation() (which will already be set to 0 when
> called
> as part of unregistering a key), pass in the key to check.
>
> Signed-off-by: Benjamin Marzinski <[email protected]>
> ---
> libmpathpersist/mpath_persist_int.c | 101 +++++++++++++++++++-------
> --
> 1 file changed, 70 insertions(+), 31 deletions(-)
>
> diff --git a/libmpathpersist/mpath_persist_int.c
> b/libmpathpersist/mpath_persist_int.c
> index 9c71011f..f130f5f5 100644
> --- a/libmpathpersist/mpath_persist_int.c
> +++ b/libmpathpersist/mpath_persist_int.c
> @@ -564,18 +564,23 @@ static int mpath_prout_reg(struct multipath
> *mpp,int rq_servact, int rq_scope,
> return status;
> }
>
> +enum preempt_work {
> + PREE_WORK_NONE,
> + PREE_WORK_REL,
> + PREE_WORK_REL_UNREG,
> +};
> /*
> * Called to make a multipath device preempt its own reservation
> (and
> - * optionally release the reservation). Doing this causes the
> reservation
> - * keys to be removed from all the device paths except that path
> used to issue
> - * the preempt, so they need to be restored. To avoid the chance
> that IO
> - * goes to these paths when they don't have a registered key, the
> device
> - * is suspended before issuing the preemption, and the keys are
> reregistered
> - * before resuming it.
> + * optional extra work). Doing this causes the reservation keys to
> be removed
> + * from all the device paths except that path used to issue the
> preempt, so
> + * they may need to be restored. To avoid the chance that IO goes to
> these
> + * paths when they don't have a registered key and a reservation
> exists, the
> + * device is suspended before issuing the preemption, and the keys
> are
> + * reregistered (or the reservation is released) before resuming it.
> */
> static int do_preempt_self(struct multipath *mpp, struct be64
> sa_key,
> int rq_servact, int rq_scope, unsigned
> int rq_type,
> - int noisy, bool do_release)
> + int noisy, enum preempt_work work)
> {
> int status, rel_status = MPATH_PR_SUCCESS;
> struct path *pp = NULL;
> @@ -596,7 +601,7 @@ static int do_preempt_self(struct multipath *mpp,
> struct be64 sa_key,
> goto fail_resume;
> }
>
> - if (do_release) {
> + if (work != PREE_WORK_NONE) {
> memset(¶mp, 0, sizeof(paramp));
> memcpy(paramp.key, &mpp->reservation_key, 8);
> rel_status = prout_do_scsi_ioctl(pp->dev,
> MPATH_PROUT_REL_SA,
> @@ -604,15 +609,26 @@ static int do_preempt_self(struct multipath
> *mpp, struct be64 sa_key,
> if (rel_status != MPATH_PR_SUCCESS)
> condlog(0, "%s: release on alternate path
> failed.",
> mpp->wwid);
> + else if (work == PREE_WORK_REL_UNREG) {
> + /* unregister the last path */
> + rel_status = prout_do_scsi_ioctl(pp->dev,
> +
> MPATH_PROUT_REG_IGN_SA,
> + rq_scope,
> rq_type,
> + ¶mp,
> noisy);
> + if (rel_status != MPATH_PR_SUCCESS)
> + condlog(0, "%s: final self preempt
> unregister failed,",
> + mpp->wwid);
> + }
> + }
> + if (work != PREE_WORK_REL_UNREG) {
> + memset(¶mp, 0, sizeof(paramp));
> + memcpy(paramp.sa_key, &mpp->reservation_key, 8);
> + status = mpath_prout_reg(mpp,
> MPATH_PROUT_REG_IGN_SA, rq_scope,
> + rq_type, ¶mp, noisy);
> + if (status != MPATH_PR_SUCCESS)
> + condlog(0, "%s: self preempt failed to
> reregister paths.",
> + mpp->wwid);
> }
> -
> - memset(¶mp, 0, sizeof(paramp));
> - memcpy(paramp.sa_key, &mpp->reservation_key, 8);
> - status = mpath_prout_reg(mpp, MPATH_PROUT_REG_IGN_SA,
> rq_scope,
> - rq_type, ¶mp, noisy);
> - if (status != MPATH_PR_SUCCESS)
> - condlog(0, "%s: self preempt failed to reregister
> paths.",
> - mpp->wwid);
>
> fail_resume:
> if (!dm_simplecmd_noflush(DM_DEVICE_RESUME, mpp->alias,
> udev_flags)) {
> @@ -625,10 +641,10 @@ fail_resume:
> }
>
> static int preempt_self(struct multipath *mpp, int rq_servact, int
> rq_scope,
> - unsigned int rq_type, int noisy, bool
> do_release)
> + unsigned int rq_type, int noisy, enum
> preempt_work work)
> {
> return do_preempt_self(mpp, mpp->reservation_key,
> rq_servact, rq_scope,
> - rq_type, noisy, do_release);
> + rq_type, noisy, work);
> }
>
> static int preempt_all(struct multipath *mpp, int rq_servact, int
> rq_scope,
> @@ -636,7 +652,7 @@ static int preempt_all(struct multipath *mpp, int
> rq_servact, int rq_scope,
> {
> struct be64 zerokey = {0};
> return do_preempt_self(mpp, zerokey, rq_servact, rq_scope,
> rq_type,
> - noisy, false);
> + noisy, PREE_WORK_NONE);
> }
>
> static int reservation_key_matches(struct multipath *mpp, uint8_t
> *key,
> @@ -661,18 +677,21 @@ static int reservation_key_matches(struct
> multipath *mpp, uint8_t *key,
> return YNU_NO;
> }
>
> -static bool check_holding_reservation(struct multipath *mpp,
> unsigned int *type)
> +static bool check_holding_reservation(struct multipath *mpp, uint8_t
> *curr_key,
> + unsigned int *type)
> {
> - if (get_be64(mpp->reservation_key) &&
> + uint64_t zerokey = 0;
> + if (memcmp(curr_key, &zerokey, 8) != 0 &&
> get_prhold(mpp->alias) == PR_SET &&
> - reservation_key_matches(mpp, (uint8_t *)&mpp-
> >reservation_key, type) == YNU_YES)
> + reservation_key_matches(mpp, curr_key, type) == YNU_YES)
> return true;
> return false;
> }
>
> -static int mpath_prout_rel(struct multipath *mpp,int rq_servact, int
> rq_scope,
> +static int mpath_prout_rel(struct multipath *mpp, int rq_servact,
> int rq_scope,
> unsigned int rq_type,
> - struct prout_param_descriptor * paramp,
> int noisy)
> + struct prout_param_descriptor *paramp,
> int noisy,
> + bool *unregistered)
> {
> int i, j;
> struct pathgroup *pgp = NULL;
> @@ -685,6 +704,8 @@ static int mpath_prout_rel(struct multipath
> *mpp,int rq_servact, int rq_scope,
> bool all_threads_failed;
> unsigned int res_type;
>
> + if (unregistered)
> + *unregistered = false;
> if (!mpp)
> return MPATH_PR_DMMP_ERROR;
>
> @@ -762,7 +783,8 @@ static int mpath_prout_rel(struct multipath
> *mpp,int rq_servact, int rq_scope,
> return status;
> }
>
> - if (!check_holding_reservation(mpp, &res_type)) {
> + if (!check_holding_reservation(mpp, (uint8_t *)&mpp-
> >reservation_key,
> + &res_type)) {
> condlog(2, "%s: Releasing key not holding
> reservation.", mpp->wwid);
> return MPATH_PR_SUCCESS;
> }
> @@ -785,7 +807,10 @@ static int mpath_prout_rel(struct multipath
> *mpp,int rq_servact, int rq_scope,
> * 4. Reregistering keys on all the paths
> * 5. Resuming the device
> */
> - return preempt_self(mpp, MPATH_PROUT_PREE_SA, rq_scope,
> rq_type, noisy, true);
> + if (unregistered)
> + *unregistered = true;
> + return preempt_self(mpp, MPATH_PROUT_PREE_SA, rq_scope,
> rq_type, noisy,
> + unregistered ? PREE_WORK_REL_UNREG :
> PREE_WORK_REL);
This is hard to understand and possibly error-prone in the future.
You use the pointer "unregistered" as a boolean, which looks like a
typo (missing "*") in the first place. It turns out that it's
intentional, and that (unregistered != NULL) indicates that this code
was called from the MPATH_PROUT_REG_IGN_SA case in
do_mpath_persistent_reserve_out() (as opposed to the MPATH_PROUT_REL_SA
case).
Could you write this in a somewhat more obvious way, please; perhaps by
passing a preempt_work parameter to mpath_prout_rel()?
> }
>
> /*
> @@ -828,7 +853,7 @@ int do_mpath_persistent_reserve_out(vector curmp,
> vector pathvec, int fd,
> select_reservation_key(conf, mpp);
> put_multipath_config(conf);
>
> - memcpy(&oldkey, &mpp->reservation_key, 8);
> + oldkey = mpp->reservation_key;
> unregistering = (memcmp(&zerokey, paramp->sa_key, 8) == 0);
> if (mpp->prkey_source == PRKEY_SOURCE_FILE &&
> (rq_servact == MPATH_PROUT_REG_IGN_SA ||
> @@ -905,7 +930,21 @@ int do_mpath_persistent_reserve_out(vector
> curmp, vector pathvec, int fd,
> {
> case MPATH_PROUT_REG_SA:
> case MPATH_PROUT_REG_IGN_SA:
> - ret= mpath_prout_reg(mpp, rq_servact, rq_scope,
> rq_type, paramp, noisy);
> + if (unregistering && check_holding_reservation(mpp,
> (uint8_t *)&oldkey, &rq_type)) {
> + bool unregistered = false;
> + struct be64 newkey = mpp->reservation_key;
> + /* temporarily restore reservation key */
> + mpp->reservation_key = oldkey;
> + ret = mpath_prout_rel(mpp,
> MPATH_PROUT_REL_SA, rq_scope,
> + rq_type, paramp,
> noisy,
> + &unregistered);
> + mpp->reservation_key = newkey;
> + if (ret == MPATH_PR_SUCCESS &&
> !unregistered)
> + ret = mpath_prout_reg(mpp,
> rq_servact, rq_scope,
> + rq_type,
> paramp, noisy);
I comment stating that mpp->reservation_key is zero here and that
mpath_prout_reg() actually unregisters the key would help readers of
this code.
Regards
Martin
> + } else
> + ret = mpath_prout_reg(mpp, rq_servact,
> rq_scope,
> + rq_type, paramp,
> noisy);
> break;
> case MPATH_PROUT_PREE_SA:
> case MPATH_PROUT_PREE_AB_SA:
> @@ -921,7 +960,7 @@ int do_mpath_persistent_reserve_out(vector curmp,
> vector pathvec, int fd,
> /* if we are preempting ourself */
> if (memcmp(paramp->sa_key, paramp->key, 8) == 0) {
> ret = preempt_self(mpp, rq_servact,
> rq_scope, rq_type,
> - noisy, false);
> + noisy, PREE_WORK_NONE);
> break;
> }
> /* fallthrough */
> @@ -932,14 +971,14 @@ int do_mpath_persistent_reserve_out(vector
> curmp, vector pathvec, int fd,
> paramp, noisy, NULL,
> &failed_paths);
> if (rq_servact == MPATH_PROUT_RES_SA &&
> ret != MPATH_PR_SUCCESS && failed_paths &&
> - check_holding_reservation(mpp, &res_type) &&
> + check_holding_reservation(mpp, paramp->key,
> &res_type) &&
> res_type == rq_type)
> /* The reserve failed, but multipathd says
> we hold it */
> ret = MPATH_PR_SUCCESS;
> break;
> }
> case MPATH_PROUT_REL_SA:
> - ret = mpath_prout_rel(mpp, rq_servact, rq_scope,
> rq_type, paramp, noisy);
> + ret = mpath_prout_rel(mpp, rq_servact, rq_scope,
> rq_type, paramp, noisy, NULL);
> break;
> default:
> return MPATH_PR_OTHER;