This option lets multipath set a scsi disk's max_retries sysfs value.
Setting this can be helpful for cases where the path checker succeeds,
but IO commands hang and timeout. By default, the SCSI layer will retry
IOs 5 times. Reducing this value will allow multipath to retry the IO
down another path sooner.

Signed-off-by: Benjamin Marzinski <bmarz...@redhat.com>
---
 libmultipath/config.c         |  3 +++
 libmultipath/config.h         |  3 +++
 libmultipath/dict.c           | 34 ++++++++++++++++++++++++++++
 libmultipath/discovery.c      | 42 ++++++++++++++++++++++++++++++++++-
 libmultipath/propsel.c        | 18 +++++++++++++++
 libmultipath/propsel.h        |  1 +
 libmultipath/structs.h        |  7 ++++++
 multipath/multipath.conf.5.in | 22 ++++++++++++++++++
 8 files changed, 129 insertions(+), 1 deletion(-)

diff --git a/libmultipath/config.c b/libmultipath/config.c
index b7dbc6f5..9d90f512 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -420,6 +420,7 @@ merge_pce(struct pcentry *dst, struct pcentry *src)
        merge_num(fast_io_fail);
        merge_num(dev_loss);
        merge_num(eh_deadline);
+       merge_num(max_retries);
 }
 
 static void
@@ -448,6 +449,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
        merge_num(fast_io_fail);
        merge_num(dev_loss);
        merge_num(eh_deadline);
+       merge_num(max_retries);
        merge_num(user_friendly_names);
        merge_num(retain_hwhandler);
        merge_num(detect_prio);
@@ -615,6 +617,7 @@ store_hwe (vector hwtable, struct hwentry * dhwe)
        hwe->fast_io_fail = dhwe->fast_io_fail;
        hwe->dev_loss = dhwe->dev_loss;
        hwe->eh_deadline = dhwe->eh_deadline;
+       hwe->max_retries = dhwe->max_retries;
        hwe->user_friendly_names = dhwe->user_friendly_names;
        hwe->retain_hwhandler = dhwe->retain_hwhandler;
        hwe->detect_prio = dhwe->detect_prio;
diff --git a/libmultipath/config.h b/libmultipath/config.h
index 8c22ce75..197a567f 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -47,6 +47,7 @@ struct pcentry {
        int fast_io_fail;
        unsigned int dev_loss;
        int eh_deadline;
+       int max_retries;
 };
 
 struct hwentry {
@@ -72,6 +73,7 @@ struct hwentry {
        int fast_io_fail;
        unsigned int dev_loss;
        int eh_deadline;
+       int max_retries;
        int user_friendly_names;
        int retain_hwhandler;
        int detect_prio;
@@ -162,6 +164,7 @@ struct config {
        int fast_io_fail;
        unsigned int dev_loss;
        int eh_deadline;
+       int max_retries;
        int log_checker_err;
        int allow_queueing;
        int allow_usb_devices;
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 044067af..fc438947 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -1152,6 +1152,36 @@ declare_hw_snprint(eh_deadline, print_undef_off_zero)
 declare_pc_handler(eh_deadline, set_undef_off_zero)
 declare_pc_snprint(eh_deadline, print_undef_off_zero)
 
+static int
+set_max_retries(vector strvec, void *ptr, const char *file, int line_nr)
+{
+       char * buff;
+       int *int_ptr = (int *)ptr;
+
+       buff = set_value(strvec);
+       if (!buff)
+               return 1;
+
+       if (strcmp(buff, "off") == 0)
+               *int_ptr = UOZ_OFF;
+       else if (strcmp(buff, "0") == 0)
+               *int_ptr = UOZ_ZERO;
+       else
+               do_set_int(strvec, int_ptr, 1, 5, file, line_nr, buff);
+
+       free(buff);
+       return 0;
+}
+
+declare_def_handler(max_retries, set_max_retries)
+declare_def_snprint(max_retries, print_undef_off_zero)
+declare_ovr_handler(max_retries, set_max_retries)
+declare_ovr_snprint(max_retries, print_undef_off_zero)
+declare_hw_handler(max_retries, set_max_retries)
+declare_hw_snprint(max_retries, print_undef_off_zero)
+declare_pc_handler(max_retries, set_max_retries)
+declare_pc_snprint(max_retries, print_undef_off_zero)
+
 static int
 set_pgpolicy(vector strvec, void *ptr, const char *file, int line_nr)
 {
@@ -2079,6 +2109,7 @@ init_keywords(vector keywords)
        install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, 
&snprint_def_fast_io_fail);
        install_keyword("dev_loss_tmo", &def_dev_loss_handler, 
&snprint_def_dev_loss);
        install_keyword("eh_deadline", &def_eh_deadline_handler, 
&snprint_def_eh_deadline);
+       install_keyword("max_retries", &def_max_retries_handler, 
&snprint_def_max_retries);
        install_keyword("bindings_file", &deprecated_bindings_file_handler, 
&snprint_deprecated);
        install_keyword("wwids_file", &deprecated_wwids_file_handler, 
&snprint_deprecated);
        install_keyword("prkeys_file", &deprecated_prkeys_file_handler, 
&snprint_deprecated);
@@ -2176,6 +2207,7 @@ init_keywords(vector keywords)
        install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, 
&snprint_hw_fast_io_fail);
        install_keyword("dev_loss_tmo", &hw_dev_loss_handler, 
&snprint_hw_dev_loss);
        install_keyword("eh_deadline", &hw_eh_deadline_handler, 
&snprint_hw_eh_deadline);
+       install_keyword("max_retries", &hw_max_retries_handler, 
&snprint_hw_max_retries);
        install_keyword("user_friendly_names", &hw_user_friendly_names_handler, 
&snprint_hw_user_friendly_names);
        install_keyword("retain_attached_hw_handler", 
&hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
        install_keyword("detect_prio", &hw_detect_prio_handler, 
&snprint_hw_detect_prio);
@@ -2220,6 +2252,7 @@ init_keywords(vector keywords)
        install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, 
&snprint_ovr_fast_io_fail);
        install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, 
&snprint_ovr_dev_loss);
        install_keyword("eh_deadline", &ovr_eh_deadline_handler, 
&snprint_ovr_eh_deadline);
+       install_keyword("max_retries", &ovr_max_retries_handler, 
&snprint_ovr_max_retries);
        install_keyword("user_friendly_names", 
&ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names);
        install_keyword("retain_attached_hw_handler", 
&ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
        install_keyword("detect_prio", &ovr_detect_prio_handler, 
&snprint_ovr_detect_prio);
@@ -2248,6 +2281,7 @@ init_keywords(vector keywords)
        install_keyword("fast_io_fail_tmo", &pc_fast_io_fail_handler, 
&snprint_pc_fast_io_fail);
        install_keyword("dev_loss_tmo", &pc_dev_loss_handler, 
&snprint_pc_dev_loss);
        install_keyword("eh_deadline", &pc_eh_deadline_handler, 
&snprint_pc_eh_deadline);
+       install_keyword("max_retries", &pc_max_retries_handler, 
&snprint_pc_max_retries);
        install_sublevel_end();
 
        install_keyword_root("multipaths", &multipaths_handler);
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 84ce5fe7..ee261d90 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -614,6 +614,43 @@ sysfs_set_eh_deadline(struct path *pp)
        return (ret <= 0);
 }
 
+static int
+sysfs_set_max_retries(struct path *pp)
+{
+       struct udev_device *parent;
+       char value[16];
+       STRBUF_ON_STACK(buf);
+       int ret, len;
+
+       if (pp->max_retries == MAX_RETRIES_UNSET)
+               return 0;
+
+       if (!pp->udev || pp->sg_id.host_no < 0)
+               return 1;
+
+       len = sprintf(value, "%d", (pp->max_retries == MAX_RETRIES_OFF)? -1 :
+                                  (pp->max_retries == MAX_RETRIES_ZERO)? 0 :
+                                  pp->max_retries);
+
+       parent = udev_device_get_parent_with_subsystem_devtype(pp->udev,
+                       "scsi", "scsi_device");
+       if (!parent)
+               return 1;
+
+       if (print_strbuf(&buf, "scsi_disk/%i:%i:%i:%" PRIu64 "/max_retries",
+                        pp->sg_id.host_no, pp->sg_id.channel,
+                        pp->sg_id.scsi_id, pp->sg_id.lun) < 0)
+               return 1;
+
+       ret = sysfs_attr_set_value(parent, get_strbuf_str(&buf), value, len);
+       if (len != ret)
+               log_sysfs_attr_set_value(3, ret,
+                                        "%s/%s: failed to set value to %s",
+                                        udev_device_get_sysname(parent),
+                                        get_strbuf_str(&buf), value);
+       return (len != ret);
+}
+
 static void
 sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
 {
@@ -875,10 +912,12 @@ sysfs_set_scsi_tmo (struct config *conf, struct multipath 
*mpp)
                select_fast_io_fail(conf, pp);
                select_dev_loss(conf, pp);
                select_eh_deadline(conf, pp);
+               select_max_retries(conf, pp);
 
                if (pp->dev_loss == DEV_LOSS_TMO_UNSET &&
                    pp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
-                   pp->eh_deadline == EH_DEADLINE_UNSET)
+                   pp->eh_deadline == EH_DEADLINE_UNSET &&
+                   pp->max_retries == MAX_RETRIES_UNSET)
                        continue;
 
                if (pp->bus != SYSFS_BUS_SCSI) {
@@ -886,6 +925,7 @@ sysfs_set_scsi_tmo (struct config *conf, struct multipath 
*mpp)
                        continue;
                }
                sysfs_set_eh_deadline(pp);
+               sysfs_set_max_retries(pp);
 
                if (pp->dev_loss == DEV_LOSS_TMO_UNSET &&
                    pp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index 44241e2a..15abb9e5 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -960,6 +960,24 @@ out:
        return 0;
 }
 
+int select_max_retries(struct config *conf, struct path *pp)
+{
+       const char *origin;
+       STRBUF_ON_STACK(buff);
+
+       pp_set_ovr_pce(max_retries);
+       pp_set_hwe(max_retries);
+       pp_set_conf(max_retries);
+       pp->max_retries = MAX_RETRIES_UNSET;
+       /* not changing sysfs in default cause, so don't print anything */
+       return 0;
+out:
+       print_undef_off_zero(&buff, pp->max_retries);
+       condlog(3, "%s: max_retries = %s %s", pp->dev,
+               get_strbuf_str(&buff), origin);
+       return 0;
+}
+
 int select_flush_on_last_del(struct config *conf, struct multipath *mp)
 {
        const char *origin;
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
index 73615c2f..7203509e 100644
--- a/libmultipath/propsel.h
+++ b/libmultipath/propsel.h
@@ -21,6 +21,7 @@ int select_gid(struct config *conf, struct multipath *mp);
 int select_fast_io_fail(struct config *conf, struct path *pp);
 int select_dev_loss(struct config *conf, struct path *pp);
 int select_eh_deadline(struct config *conf, struct path *pp);
+int select_max_retries(struct config *conf, struct path *pp);
 int select_reservation_key(struct config *conf, struct multipath *mp);
 int select_retain_hwhandler (struct config *conf, struct multipath * mp);
 int select_detect_prio(struct config *conf, struct path * pp);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 17e13ee7..c20e99ce 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -295,6 +295,12 @@ enum eh_deadline_states {
        EH_DEADLINE_ZERO = UOZ_ZERO,
 };
 
+enum max_retries_states {
+       MAX_RETRIES_UNSET = UOZ_UNDEF,
+       MAX_RETRIES_OFF = UOZ_OFF,
+       MAX_RETRIES_ZERO = UOZ_ZERO,
+};
+
 enum recheck_wwid_states {
        RECHECK_WWID_UNDEF = YNU_UNDEF,
        RECHECK_WWID_OFF = YNU_NO,
@@ -381,6 +387,7 @@ struct path {
        int fast_io_fail;
        unsigned int dev_loss;
        int eh_deadline;
+       int max_retries;
        bool is_checked;
        bool can_use_env_uid;
        unsigned int checker_timeout;
diff --git a/multipath/multipath.conf.5.in b/multipath/multipath.conf.5.in
index 226d0019..41f3927e 100644
--- a/multipath/multipath.conf.5.in
+++ b/multipath/multipath.conf.5.in
@@ -793,6 +793,22 @@ The default is: \fB<unset>\fR
 .
 .
 .TP
+.B max_retries
+Specify the maximum number of times the SCSI layer will retry IO commands
+before returning failure. Setting this can be helpful for cases where the path
+checker succeeds, but IO commands hang and timeout. By default, the SCSI layer
+will retry IOs 5 times. Reducing this value will allow multipath to retry the 
IO
+down another path sooner. \fBNote:\fR If it is necessary to set this value, it
+is also recommended to set up shaky paths detection. See "Shaky paths 
detection"
+below. Valid values are
+\fB0\fR through \fB5\fR.
+.RS
+.TP
+The default is: \fB<unset>\fR
+.RE
+.
+.
+.TP
 .B bindings_file
 (Deprecated) This option is not supported any more, and will be ignored.
 .RS
@@ -1687,6 +1703,8 @@ section:
 .TP
 .B eh_deadline
 .TP
+.B max_retries
+.TP
 .B flush_on_last_del
 .TP
 .B user_friendly_names
@@ -1773,6 +1791,8 @@ the values are taken from the \fIdevices\fR or 
\fIdefaults\fR sections:
 .TP
 .B eh_deadline
 .TP
+.B max_retries
+.TP
 .B user_friendly_names
 .TP
 .B retain_attached_hw_handler
@@ -1844,6 +1864,8 @@ from the \fIoverrides\fR, \fIdevices\fR, or 
\fIdefaults\fR section:
 .B dev_loss_tmo
 .TP
 .B eh_deadline
+.TP
+.B max_retries
 .PD
 .
 .
-- 
2.41.0


Reply via email to