Hello,

Attached new patchset.

1) Run checkarray for 1h every day, doing 1/CHECK_SPLIT of job.
2) CHECK_SPLIT tunable via /etc/default/mdadm or by --split option
3) Introduced --interrupt and --cancel options (last one --interrupt
   current arrays check *and* reset sync_min/sync_max to default).

Remaining issue: multiple arrays, sharing common physical disks.  We
can set /sys/block/mdX/md/sync_force_parallel to 1 temporary while
doing checks.  It's easy doable, but may introduce some problems.
Another solution (looks ugly): pooling
/sys/block/mdX/md/sync_completed *and* cancel check when
sync_completed equal to sync_max.

PS: Looks like there is no chance to enter any solution for this
whishlist item to Wheezy.  But it would be really nice to see anything
from the maintainers, related to suggested above implementations.
--- /etc/cron.d/mdadm.orig	2012-06-02 12:22:29.498152386 +0400
+++ /etc/cron.d/mdadm	2012-06-02 22:57:20.164896601 +0400
@@ -5,8 +5,7 @@
 # distributed under the terms of the Artistic Licence 2.0
 #
 
-# By default, run at 00:57 on every Sunday, but do nothing unless the day of
-# the month is less than or equal to 7. Thus, only run on the first Sunday of
-# each month. crontab(5) sucks, unfortunately, in this regard; therefore this
-# hack (see #380425).
-57 0 * * 0 root if [ -x /usr/share/mdadm/checkarray ] && [ $(date +\%d) -le 7 ]; then /usr/share/mdadm/checkarray --cron --all --idle --quiet; fi
+# By default, start (or continue unfinished checks) at 00:57
+# Sunday and stop (interrupt) checks at 01:57.
+57 0 * * * root [ -x /usr/share/mdadm/checkarray ] && /usr/share/mdadm/checkarray --cron --all --idle --quiet
+57 1 * * * root [ -x /usr/share/mdadm/checkarray ] && /usr/share/mdadm/checkarray --cron --all --idle --quiet --interrupt
--- /usr/share/mdadm/checkarray.orig	2012-01-18 23:01:11.000000000 +0400
+++ /usr/share/mdadm/checkarray	2012-06-03 00:21:14.788899501 +0400
@@ -27,10 +27,12 @@
 	-a|--all & check all assembled arrays (check /proc/mdstat).
 	-s|--status & print redundancy check status of devices.
 	-x|--cancel & queue a request to cancel a running redundancy check.
+	--interrupt & queue a request to interrupt a running redundancy check.
 	-i|--idle & perform check in a lowest scheduling class (idle)
 	-l|--slow & perform check in a lower-than-standard scheduling class
 	-f|--fast & perform check in higher-than-standard scheduling class
 	--realtime & perform check in real-time scheduling class (DANGEROUS!)
+	--split n & check next 1/n'th part (n <= 28) of every specified device (override CHECK_SPLIT)
 	-c|--cron & honour AUTOCHECK setting in /etc/default/mdadm.
 	-q|--quiet & suppress informational messages.
 	-Q|--real-quiet & suppress all output messages, including warnings and errors.
@@ -53,8 +55,16 @@
   echo "You can also control the status of a check with /proc/mdstat ."
 }
 
+is_true()
+{
+  case "${1:-}" in
+    [Yy]es|[Yy]|1|[Tt]rue|[Tt]) return 0;;
+    *) return 1;
+  esac
+}
+
 SHORTOPTS=achVqQsxilf
-LONGOPTS=all,cron,help,version,quiet,real-quiet,status,cancel,idle,slow,fast,realtime
+LONGOPTS=all,cron,help,version,quiet,real-quiet,status,cancel,interrupt,idle,slow,fast,realtime,split:
 
 eval set -- $(getopt -o $SHORTOPTS -l $LONGOPTS -n $PROGNAME -- "$@")
 
@@ -66,41 +76,57 @@
 action=check
 ionice=
 
-for opt in $@; do
-  case "$opt" in
-    -a|--all) all=1;;
-    -s|--status) action=status;;
-    -x|--cancel) action=idle;;
-    -i|--idle) ionice=idle;;
-    -l|--slow) ionice=low;;
-    -f|--fast) ionice=high;;
-    --realtime) ionice=realtime;;
-    -c|--cron) cron=1;;
-    -q|--quiet) quiet=1;;
-    -Q|--real-quiet) quiet=2;;
+DEBIANCONFIG=/etc/default/mdadm
+[ -r $DEBIANCONFIG ] && . $DEBIANCONFIG
+if [ $cron = 1 ] && ! is_true ${AUTOCHECK:-false}; then
+  [ $quiet -lt 1 ] && echo "$PROGNAME: I: disabled in $DEBIANCONFIG ." >&2
+  exit 0
+fi
+
+CHECK_SPLIT=${CHECK_SPLIT:-28}
+
+while true
+do
+  case "$1" in
+    -a|--all) all=1; shift;;
+    -s|--status) action=status; shift;;
+    -x|--cancel) action=cancel; shift;;
+    --interrupt) action=interrupt; shift;;
+    -i|--idle) ionice=idle; shift;;
+    -l|--slow) ionice=low; shift;;
+    -f|--fast) ionice=high; shift;;
+    --realtime) ionice=realtime; shift;;
+    -c|--cron) cron=1; shift;;
+    -q|--quiet) quiet=1; shift;;
+    -Q|--real-quiet) quiet=2; shift;;
     -h|--help) usage; exit 0;;
     -V|--version) about; exit 0;;
+    --split) CHECK_SPLIT=$2; shift 2;;
+    --) shift; break;;
+    *) echo "$PROGNAME: E: invalid option: $opt" >&2; usage >&2; exit 0;;
+  esac
+done
+
+for opt in $@
+do
+  case "$opt" in
     /dev/md/*|md/*) arrays="${arrays:+$arrays }md${opt#*md/}";;
     /dev/md*|md*) arrays="${arrays:+$arrays }${opt#/dev/}";;
     /sys/block/md*) arrays="${arrays:+$arrays }${opt#/sys/block/}";;
-    --) :;;
-    *) echo "$PROGNAME: E: invalid option: $opt" >&2; usage >&2; exit 0;;
+    *) echo "$PROGNAME: E: invalid argument: $opt" >&2; usage >&2; exit 0;;
   esac
 done
 
-is_true()
-{
-  case "${1:-}" in
-    [Yy]es|[Yy]|1|[Tt]rue|[Tt]) return 0;;
-    *) return 1;
-  esac
-}
+if [ $CHECK_SPLIT -gt 28 ]
+then
+  CHECK_SPLIT=28
+  echo "$PROGNAME: W: CHECK_SPLIT > 28, reset to 28." >&2
+fi
 
-DEBIANCONFIG=/etc/default/mdadm
-[ -r $DEBIANCONFIG ] && . $DEBIANCONFIG
-if [ $cron = 1 ] && ! is_true ${AUTOCHECK:-false}; then
-  [ $quiet -lt 1 ] && echo "$PROGNAME: I: disabled in $DEBIANCONFIG ." >&2
-  exit 0
+if [ $CHECK_SPLIT -lt 1 ]
+then
+  CHECK_SPLIT=1
+  echo "$PROGNAME: W: CHECK_SPLIT < 1, reset to 1." >&2
 fi
 
 if [ ! -f /proc/mdstat ]; then
@@ -163,10 +189,33 @@
     continue
   fi
 
+  chunk_size=$(cat $MDBASE/chunk_size)
+  # set one to safe value if raid level has no chunk_size (e.g., raid 1):
+  [ $chunk_size -lt 1 ] && chunk_size=1
+
+  array_size=$(cat $MDBASE/../size)
+  array_size=$(($array_size >> 1))
+
+  check_size=$(($array_size / $CHECK_SPLIT))
+  # ensure it's of multiple $chunk_size:
+  check_size=$(($check_size - $check_size % $chunk_size))
+  [ $check_size -eq 0 ] && check_size=$chunk_size
+
   case "$action" in
-    idle)
-      echo $action > $MDBASE/sync_action
+    cancel|interrupt)
+      completed=$(cut -d ' ' -f1 $MDBASE/sync_completed)
+      [ $completed = "none" ] && completed=0
+      echo "idle" > $MDBASE/sync_action
       [ $quiet -lt 1 ] && echo "$PROGNAME: I: cancel request queued for array $array." >&2
+
+      if [ "$action" = "cancel" ]
+      then
+        completed=0
+        echo max > $MDBASE/sync_max
+      fi
+
+      # save sync_min, it must be a multiple of chunk_size:
+      echo $(($completed - $completed%$chunk_size)) > $MDBASE/sync_min
       ;;
 
     check)
@@ -175,6 +224,23 @@
         continue
       fi
 
+      sync_min=$(cat $MDBASE/sync_min)
+      sync_max=$(cat $MDBASE/sync_max)
+
+      [ $sync_max = "max" ] && sync_max=0
+
+      # Workaround for Squeeze kernel, see:
+      # http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=c07b70ad32ed0a5ec9735cafb1aa10b3a2298b7d
+      [ $sync_min -ge $sync_max ] && sync_min=0
+
+      if [ $sync_min -eq $sync_max ]
+      then
+        sync_max=$(($sync_max + $check_size))
+        [ $sync_max -ge $array_size ] && sync_max=$array_size
+
+        echo $sync_max > $MDBASE/sync_max
+      fi
+
       # queue request for the array. The kernel will make sure that these requests
       # are properly queued so as to not kill one of the array.
       echo $action > $MDBASE/sync_action

Reply via email to