The workaround for releasing a reservation held by a failed path
(clearing all persistent reservation data and then reregistering all the
keys) has several problems. It requires devices that support the READ
FULL STATUS command and are capable of specifying TransportIDs with
REGISTRATION commands (SIP_C), neither of which are mandatory to support
SCSI Persistent Reservations. Non SIP_C devices will be left with no
registered keys. Also, not all cleared keys are registered, just the
ones going to the Target Port receiving the REGISTRATION command. To
reregister all the keys, the code would have to also use the "All Target
Ports" flag to register keys on different Target Ports, but this could
end up registering keys on paths they aren't supposed to be on (for
instance if one of the registered keys was only there because the path
was down and it couldn't be released).

The redesign avoids these issues by only using mandatory Persistent
Reservation commands, without extra optional parameters or flags, and
only effects the keys of the multipath device it is being issued on.
The new workaround is:
1. Suspend the multipath device to prevent I/O
2. Preempt the key to move the reservation to an available path. This
   also removes the registered keys from every path except the path
   issuing the PREEMPT command. Since the device is suspended, not I/O
   can go to these unregisted paths and fail.
3. Release the reservation on the path that now holds it.
4. Resume the device (since it no longer matters that most of the paths
   no longer have a registered key)
5. Reregister the keys on all the paths.

If steps 3 or 4 fail, the code will attempt to reregister the keys, and
then attempt (or possibly re-attempt) the resume.

Signed-off-by: Benjamin Marzinski <bmarz...@redhat.com>
---
 libmpathpersist/mpath_persist_int.c | 173 ++++++++++++----------------
 libmultipath/libmultipath.version   |   1 +
 2 files changed, 76 insertions(+), 98 deletions(-)

diff --git a/libmpathpersist/mpath_persist_int.c 
b/libmpathpersist/mpath_persist_int.c
index 13829742..7fb08b2e 100644
--- a/libmpathpersist/mpath_persist_int.c
+++ b/libmpathpersist/mpath_persist_int.c
@@ -366,7 +366,8 @@ static int mpath_prout_reg(struct multipath *mpp,int 
rq_servact, int rq_scope,
 
 static int mpath_prout_common(struct multipath *mpp,int rq_servact, int 
rq_scope,
                              unsigned int rq_type,
-                             struct prout_param_descriptor* paramp, int noisy)
+                             struct prout_param_descriptor* paramp, int noisy,
+                             struct path **pptr)
 {
        int i,j, ret;
        struct pathgroup *pgp = NULL;
@@ -385,6 +386,8 @@ static int mpath_prout_common(struct multipath *mpp,int 
rq_servact, int rq_scope
                        found = true;
                        ret = prout_do_scsi_ioctl(pp->dev, rq_servact, rq_scope,
                                                  rq_type, paramp, noisy);
+                       if (ret == MPATH_PR_SUCCESS && pptr)
+                               *pptr = pp;
                        if (ret != MPATH_PR_RETRYABLE_ERROR)
                                return ret;
                }
@@ -405,14 +408,12 @@ static int mpath_prout_rel(struct multipath *mpp,int 
rq_servact, int rq_scope,
        struct path *pp = NULL;
        int active_pathcount = 0;
        pthread_attr_t attr;
-       int rc, found = 0;
+       int rc;
        int count = 0;
        int status = MPATH_PR_SUCCESS;
-       struct prin_resp resp;
-       struct prout_param_descriptor *pamp = NULL;
-       struct prin_resp *pr_buff;
-       int length;
-       struct transportid *pptr = NULL;
+       struct prin_resp resp = {0};
+       uint16_t udev_flags = (mpp->skip_kpartx) ? MPATH_UDEV_NO_KPARTX_FLAG : 
0;
+       bool did_resume = false;
 
        if (!mpp)
                return MPATH_PR_DMMP_ERROR;
@@ -502,104 +503,78 @@ static int mpath_prout_rel(struct multipath *mpp,int 
rq_servact, int rq_scope,
        }
 
        condlog (2, "%s: Path holding reservation is not available.", 
mpp->wwid);
-
-       pr_buff =  mpath_alloc_prin_response(MPATH_PRIN_RFSTAT_SA);
-       if (!pr_buff){
-               condlog (0, "%s: failed to  alloc pr in response buffer.", 
mpp->wwid);
+       /*
+        * Cannot free the reservation because the path that is holding it
+        * is not usable. Workaround this by:
+        * 1. Suspending the device
+        * 2. Preempting the reservation to move it to a usable path
+        *    (this removes the registered keys on all paths except the
+        *    preempting one. Since the device is suspended, no IO can
+        *    go to these unregistered paths and fail).
+        * 3. Releasing the reservation on the path that now holds it.
+        * 4. Resuming the device (since it no longer matters that most of
+        *    that paths no longer have a registered key)
+        * 5. Reregistering keys on all the paths
+        */
+
+       if (!dm_simplecmd_noflush(DM_DEVICE_SUSPEND, mpp->alias, 0)) {
+               condlog(0, "%s: release: failed to suspend dm device.", 
mpp->wwid);
                return MPATH_PR_OTHER;
        }
 
-       status = mpath_prin_activepath (mpp, MPATH_PRIN_RFSTAT_SA, pr_buff, 
noisy);
-
-       if (status != MPATH_PR_SUCCESS){
-               condlog (0,  "%s: pr in read full status command failed.",  
mpp->wwid);
-               goto out;
-       }
-
-       num = pr_buff->prin_descriptor.prin_readfd.number_of_descriptor;
-       if (0 == num){
-               goto out;
-       }
-       length = sizeof (struct prout_param_descriptor) + (sizeof (struct 
transportid *));
-
-       pamp = (struct prout_param_descriptor *)malloc (length);
-       if (!pamp){
-               condlog (0, "%s: failed to alloc pr out parameter.", mpp->wwid);
-               goto out;
-       }
-
-       memset(pamp, 0, length);
-
-       pamp->trnptid_list[0] = (struct transportid *) malloc (sizeof (struct 
transportid));
-       if (!pamp->trnptid_list[0]){
-               condlog (0, "%s: failed to alloc pr out transportid.", 
mpp->wwid);
-               goto out1;
-       }
-       pptr = pamp->trnptid_list[0];
-
-       if (get_be64(mpp->reservation_key)){
-               memcpy (pamp->key, &mpp->reservation_key, 8);
-               condlog (3, "%s: reservation key set.", mpp->wwid);
+       memset(paramp, 0, sizeof(*paramp));
+       memcpy(paramp->key, &mpp->reservation_key, 8);
+       memcpy(paramp->sa_key, &mpp->reservation_key, 8);
+       status = mpath_prout_common(mpp, MPATH_PROUT_PREE_SA, rq_scope, rq_type,
+                                   paramp, noisy, &pp);
+       if (status != MPATH_PR_SUCCESS) {
+               condlog(0, "%s: release: pr preempt command failed.", 
mpp->wwid);
+               goto fail_resume;
        }
 
-       status = mpath_prout_common (mpp, MPATH_PROUT_CLEAR_SA,
-                                    rq_scope, rq_type, pamp, noisy);
-
-       if (status) {
-               condlog(0, "%s: failed to send CLEAR_SA", mpp->wwid);
-               goto out2;
+       memset(paramp, 0, sizeof(*paramp));
+       memcpy(paramp->key, &mpp->reservation_key, 8);
+       status = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REL_SA, rq_scope,
+                                    rq_type, paramp, noisy);
+       if (status != MPATH_PR_SUCCESS) {
+               condlog(0, "%s: release on alternate path failed.", mpp->wwid);
+               goto out_reregister;
        }
 
-       pamp->num_transportid = 1;
-
-       for (i = 0; i < num; i++){
-               if (get_be64(mpp->reservation_key) &&
-                       
memcmp(pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key,
-                              &mpp->reservation_key, 8)){
-                       /*register with transport id*/
-                       memset(pamp, 0, length);
-                       pamp->trnptid_list[0] = pptr;
-                       memset (pamp->trnptid_list[0], 0, sizeof (struct 
transportid));
-                       memcpy (pamp->sa_key,
-                                       
pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key, 8);
-                       pamp->sa_flags = MPATH_F_SPEC_I_PT_MASK;
-                       pamp->num_transportid = 1;
-
-                       memcpy (pamp->trnptid_list[0],
-                                       
&pr_buff->prin_descriptor.prin_readfd.descriptors[i]->trnptid,
-                                       sizeof (struct transportid));
-                       status = mpath_prout_common (mpp, MPATH_PROUT_REG_SA, 
0, rq_type,
-                                       pamp, noisy);
-
-                       pamp->sa_flags = 0;
-                       memcpy (pamp->key, 
pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key, 8);
-                       memset (pamp->sa_key, 0, 8);
-                       pamp->num_transportid = 0;
-                       status = mpath_prout_common (mpp, MPATH_PROUT_REG_SA, 
0, rq_type,
-                                       pamp, noisy);
-               }
-               else
-               {
-                       if (get_be64(mpp->reservation_key))
-                               found = 1;
-               }
-
-
-       }
-
-       if (found){
-               memset (pamp, 0, length);
-               memcpy (pamp->sa_key, &mpp->reservation_key, 8);
-               memset (pamp->key, 0, 8);
-               status = mpath_prout_reg(mpp, MPATH_PROUT_REG_SA, rq_scope, 
rq_type, pamp, noisy);
+       if (!dm_simplecmd_noflush(DM_DEVICE_RESUME, mpp->alias, udev_flags)) {
+               condlog(0, "%s release: failed to resume dm device.", 
mpp->wwid);
+               /*
+                * leave status set to MPATH_PR_SUCCESS, we will have another
+                * chance to resume the device.
+                */
+               goto out_reregister;
+       }
+       did_resume = true;
+
+out_reregister:
+       memset(paramp, 0, sizeof(*paramp));
+       memcpy(paramp->sa_key, &mpp->reservation_key, 8);
+       rc = mpath_prout_reg(mpp, MPATH_PROUT_REG_IGN_SA, rq_scope, rq_type,
+                            paramp, noisy);
+       if (rc != MPATH_PR_SUCCESS)
+               condlog(0, "%s: release: failed to reregister paths.", 
mpp->wwid);
+
+       /*
+        * If we failed releasing the reservation or resuming earlier
+        * try resuming now. Otherwise, return with the reregistering status
+        * This means we will report failure, even though the resevation
+        * has been released, since the keys were not reregistered.
+        */
+       if (did_resume)
+               return rc;
+       else if (status == MPATH_PR_SUCCESS)
+               status = rc;
+fail_resume:
+       if (!dm_simplecmd_noflush(DM_DEVICE_RESUME, mpp->alias, udev_flags)) {
+               condlog(0, "%s: release: failed to resume dm device.", 
mpp->wwid);
+               if (status == MPATH_PR_SUCCESS)
+                       status = MPATH_PR_OTHER;
        }
-
-out2:
-       free(pptr);
-out1:
-       free (pamp);
-out:
-       free (pr_buff);
        return (status == MPATH_PR_RETRYABLE_ERROR) ? MPATH_PR_OTHER : status;
 }
 
@@ -620,6 +595,7 @@ int do_mpath_persistent_reserve_out(vector curmp, vector 
pathvec, int fd,
        mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
        select_reservation_key(conf, mpp);
        select_all_tg_pt(conf, mpp);
+       select_skip_kpartx(conf, mpp);
        put_multipath_config(conf);
 
        memcpy(&prkey, paramp->sa_key, 8);
@@ -661,7 +637,8 @@ int do_mpath_persistent_reserve_out(vector curmp, vector 
pathvec, int fd,
        case MPATH_PROUT_PREE_SA :
        case MPATH_PROUT_PREE_AB_SA :
        case MPATH_PROUT_CLEAR_SA:
-               ret = mpath_prout_common(mpp, rq_servact, rq_scope, rq_type, 
paramp, noisy);
+               ret = mpath_prout_common(mpp, rq_servact, rq_scope, rq_type,
+                                        paramp, noisy, NULL);
                break;
        case MPATH_PROUT_REL_SA:
                ret = mpath_prout_rel(mpp, rq_servact, rq_scope, rq_type, 
paramp, noisy);
diff --git a/libmultipath/libmultipath.version 
b/libmultipath/libmultipath.version
index a6718355..2f47b3ad 100644
--- a/libmultipath/libmultipath.version
+++ b/libmultipath/libmultipath.version
@@ -249,4 +249,5 @@ local:
 LIBMULTIPATH_29.1.0 {
 global:
        can_recheck_wwid;
+       select_skip_kpartx;
 } LIBMULTIPATH_29.0.0;
-- 
2.48.1


Reply via email to