On 15.07.2015 20:05, Vladimir 'φ-coder/phcoder' Serbinenko wrote:
> On 28.06.2015 20:06, Andrei Borzenkov wrote:
>> I was looking at implementing detection of outdated RAID members.
>> Unfortunately it appears to be fundamentally incompatible with lazy
>> scanning as implemented currently by GRUB. We simply cannot stop
>> scanning for other copies of metadata once "enough" was seen. Because
>> any other disk may contain more actual copy which invalidates
>> everything seen up to this point.
>>
>> So basically either we officially admit that GRUB is not able to detect
>> stale members or we drop lazy scanning.
>>
>> Comments, ideas?
>>
> We don't need to see all disks to decide that there is no staleness. If
> you have an array with N devices and you can lose at most K of them,
> then you can check for staleness after you have seen max(K+1, N-K)
> drives. Why?
> 
> Let those disks have generation numbers g_0,...,g_{N-1}. Our goal is to
> find the largest number G s.t. number of indices with
> g_i >= G is at least N-K.
> In most common case when you have seen K+1 disks all of them will have
> the same generation number
> g_0=g_1=...=g_{K}
> Then we know that
> G<=g_0
> Suppose not then all of 0,...,K are stale and we have lost K+1 drives
> which contradicts our goal.
> On the other hand when we have seen N-K devices we know that
> G>=min(g_0,...,g_{N-K-1})
> as with G=min(g_0,...,g_{N-K-1}) we already have N-K disks.
> 
> In cases other than mirror usually K+1<=N-K and so we don't even need to
> scan for more disks to detect staleness.
> The code will be slightly tricky as it has to handle tolerating
> staleness if there are too little disks but it's totally feasible. Let
> me figure out the rest of math and write a prototype.
Untested patch implementing these ideas, just to illustrate
>> _______________________________________________
>> Grub-devel mailing list
>> Grub-devel@gnu.org
>> https://lists.gnu.org/mailman/listinfo/grub-devel
>>
> 
> 

diff --git a/grub-core/disk/diskfilter.c b/grub-core/disk/diskfilter.c
index c4f6678..850046e 100644
--- a/grub-core/disk/diskfilter.c
+++ b/grub-core/disk/diskfilter.c
@@ -41,77 +41,201 @@ static int lv_num = 0;
 
 static struct grub_diskfilter_lv *
 find_lv (const char *name);
-static int is_lv_readable (struct grub_diskfilter_lv *lv, int easily);
+static void
+recompute_lv_readable (struct grub_diskfilter_lv *lv);
 
 
 
-static grub_err_t
-is_node_readable (const struct grub_diskfilter_node *node, int easily)
+static int
+get_number_needed_nodes(struct grub_diskfilter_segment *seg, int easily)
+{
+  int need = seg->node_count;
+
+  switch (seg->type)
+    {
+    case GRUB_DISKFILTER_RAID6:
+      if (!easily)
+	need--;
+      /* Fallthrough.  */
+    case GRUB_DISKFILTER_RAID4:
+    case GRUB_DISKFILTER_RAID5:
+      if (!easily)
+	need--;
+      /* Fallthrough.  */
+    case GRUB_DISKFILTER_STRIPED:
+      break;
+      
+    case GRUB_DISKFILTER_MIRROR:
+      need = 1;
+      break;
+
+    case GRUB_DISKFILTER_RAID10:
+      {
+	unsigned int n;
+	n = seg->layout & 0xFF;
+	if (n == 1)
+	  n = (seg->layout >> 8) & 0xFF;
+	need = seg->node_count - n + 1;
+      }
+      break;
+    }
+  return need;
+}
+
+/* Return whether node is readable and if so update node->generation.  */
+static void
+recompute_node_readable (struct grub_diskfilter_node *node)
 {
   /* Check whether we actually know the physical volume we want to
      read from.  */
   if (node->pv)
-    return !!(node->pv->disk);
+    {
+      node->readability.is_readable = !!(node->pv->disk);
+      node->readability.is_easily_readable = !!(node->pv->disk);
+      node->readability.generation = node->pv->generation;
+      return;
+    }
   if (node->lv)
-    return is_lv_readable (node->lv, easily);
+    {
+      recompute_lv_readable (node->lv);
+      node->readability = node->lv->readability;
+      return;
+    }
+  node->readability.is_readable = 0;
+  node->readability.is_easily_readable = 0;
+  node->readability.generation = 0;
+  return;
+}
+
+static grub_uint64_t
+compute_generation (struct grub_diskfilter_segment *seg, int need)
+{
+  unsigned i;
+  for (i = 0; i < seg->node_count; i++)
+    if (seg->nodes[i].readability.is_readable) {
+      unsigned j;
+      grub_uint64_t candidate = seg->nodes[i].readability.generation;
+      int higher = 0, strictly_higher = 0;
+      for (j = 0; j < seg->node_count; j++)
+	if (seg->nodes[j].readability.is_readable) {
+	  strictly_higher += (seg->nodes[j].readability.generation > candidate);
+	  higher += (seg->nodes[j].readability.generation >= candidate);
+	}
+      if (higher >= need && strictly_higher < need)
+	return candidate;
+    }
   return 0;
 }
 
 static int
-is_lv_readable (struct grub_diskfilter_lv *lv, int easily)
+still_could_increase (struct grub_diskfilter_segment *seg, int need, grub_uint64_t generation)
 {
-  unsigned i, j;
-  if (!lv)
-    return 0;
-  for (i = 0; i < lv->segment_count; i++)
+  unsigned j;
+  int strictly_higher = 0;
+  for (j = 0; j < seg->node_count; j++)
+    {
+      strictly_higher += (seg->nodes[j].readability.generation > generation) || !(seg->nodes[j].readability.is_readable);
+    }
+  return (strictly_higher >= need);
+}
+
+static void
+recompute_segment_readable (struct grub_diskfilter_segment *seg)
+{
+  int need = get_number_needed_nodes(seg, 0), have = 0;
+  int easily_readable;
+  grub_uint64_t generation;
+  unsigned j;
+  for (j = 0; j < seg->node_count; j++)
+    recompute_node_readable (seg->nodes + j);
+  for (j = 0; j < seg->node_count; j++)
+    {
+      if (seg->nodes[j].readability.is_readable)
+	have++;
+    }
+  /* Not enough disks.  */
+  if (have < need)
+    {
+      seg->readability.is_easily_readable = 0;
+      seg->readability.is_readable = 0;
+      seg->readability.generation = 0;
+      return;
+    }
+
+  generation = compute_generation (seg, need);
+  /* Do not tolerate any staleness on easily readable level.  */
+  if (still_could_increase (seg, need, generation))
+    {
+      easily_readable = 0;
+    }
+  else
     {
-      int need = lv->segments[i].node_count, have = 0;
-      switch (lv->segments[i].type)
+      int have_easily = 0, need_easily = get_number_needed_nodes(seg, 1);
+      for (j = 0; j < seg->node_count; j++)
 	{
-	case GRUB_DISKFILTER_RAID6:
-	  if (!easily)
-	    need--;
-	  /* Fallthrough.  */
-	case GRUB_DISKFILTER_RAID4:
-	case GRUB_DISKFILTER_RAID5:
-	  if (!easily)
-	    need--;
-	  /* Fallthrough.  */
-	case GRUB_DISKFILTER_STRIPED:
-	  break;
-
-	case GRUB_DISKFILTER_MIRROR:
-	  need = 1;
-	  break;
-
-	case GRUB_DISKFILTER_RAID10:
-	  {
-	    unsigned int n;
-	    n = lv->segments[i].layout & 0xFF;
-	    if (n == 1)
-	      n = (lv->segments[i].layout >> 8) & 0xFF;
-	    need = lv->segments[i].node_count - n + 1;
-	  }
-	  break;
+	  if (seg->nodes[j].readability.is_easily_readable && seg->nodes[j].readability.generation >= generation)
+	    have_easily++;
 	}
-	for (j = 0; j < lv->segments[i].node_count; j++)
-	  {
-	    if (is_node_readable (lv->segments[i].nodes + j, easily))
-	      have++;
-	    if (have >= need)
-	      break;
-	  }
-	if (have < need)
-	  return 0;
+      easily_readable = have_easily >= need_easily;
     }
+  seg->readability.generation = generation;
+  seg->readability.is_readable = 1;
+  seg->readability.is_easily_readable = easily_readable;
+}
 
-  return 1;
+static void
+recompute_lv_readable (struct grub_diskfilter_lv *lv)
+{
+  unsigned i;
+  struct grub_diskfilter_readability ret =
+    {
+      .is_readable = 1,
+      .is_easily_readable = 1,
+      .generation = -1
+    };
+  if (!lv)
+    return;
+  for (i = 0; i < lv->segment_count; i++)
+    {
+      recompute_segment_readable (&lv->segments[i]);
+      if (!lv->segments[i].readability.is_easily_readable)
+	ret.is_easily_readable = 0;
+      if (!lv->segments[i].readability.is_readable)
+	ret.is_readable = 0;
+      if (ret.generation > lv->segments[i].readability.generation)
+	ret.generation = lv->segments[i].readability.generation;
+    }
+  lv->readability = ret;
+}
+
+static int
+is_lv_readable (struct grub_diskfilter_lv *lv)
+{
+  if (!lv)
+    return 0;
+
+  if (lv->readability.is_readable)
+    return 1;
+  recompute_lv_readable(lv);
+  return lv->readability.is_readable;
+}
+
+static int
+is_lv_easily_readable (struct grub_diskfilter_lv *lv)
+{
+  if (!lv)
+    return 0;
+  if (lv->readability.is_easily_readable)
+    return 1;
+  recompute_lv_readable(lv);
+  return lv->readability.is_easily_readable;
 }
 
 static grub_err_t
 insert_array (grub_disk_t disk, const struct grub_diskfilter_pv_id *id,
 	      struct grub_diskfilter_vg *array,
               grub_disk_addr_t start_sector,
+	      grub_uint64_t generation,
 	      grub_diskfilter_t diskfilter __attribute__ ((unused)));
 
 static int
@@ -154,15 +278,16 @@ scan_disk_partition_iter (grub_disk_t disk, grub_partition_t p, void *data)
 
   for (diskfilter = grub_diskfilter_list; diskfilter; diskfilter = diskfilter->next)
     {
+      grub_uint64_t generation = 0;
 #ifdef GRUB_UTIL
       grub_util_info ("Scanning for %s devices on disk %s", 
 		      diskfilter->name, name);
 #endif
       id.uuid = 0;
       id.uuidlen = 0;
-      arr = diskfilter->detect (disk, &id, &start_sector);
+      arr = diskfilter->detect (disk, &id, &start_sector, &generation);
       if (arr &&
-	  (! insert_array (disk, &id, arr, start_sector, diskfilter)))
+	  (! insert_array (disk, &id, arr, start_sector, generation, diskfilter)))
 	{
 	  if (id.uuidlen)
 	    grub_free (id.uuid);
@@ -232,7 +357,7 @@ scan_devices (const char *arname)
 	{
 	  if ((p->iterate) (scan_disk_hook, NULL, pull))
 	    return;
-	  if (arname && is_lv_readable (find_lv (arname), 1))
+	  if (arname && is_lv_easily_readable (find_lv (arname)))
 	    return;
 	}
 
@@ -412,7 +537,7 @@ find_lv (const char *name)
 	for (lv = vg->lvs; lv; lv = lv->next)
 	  if (((lv->fullname && grub_strcmp (lv->fullname, name) == 0)
 	       || (lv->idname && grub_strcmp (lv->idname, name) == 0))
-	      && is_lv_readable (lv, 0))
+	      && is_lv_readable (lv))
 	    return lv;
     }
   return NULL;
@@ -429,7 +554,7 @@ grub_diskfilter_open (const char *name, grub_disk_t disk)
 
   lv = find_lv (name);
 
-  if (! lv)
+  if (! lv || !lv->readability.is_easily_readable)
     {
       scan_devices (name);
       if (grub_errno)
@@ -644,16 +769,19 @@ read_segment (struct grub_diskfilter_segment *seg, grub_disk_addr_t sector,
 			|| grub_errno == GRUB_ERR_UNKNOWN_DEVICE)
 		      grub_errno = GRUB_ERR_NONE;
 
-		    err = grub_diskfilter_read_node (&seg->nodes[k],
-						     read_sector
-						     + j * far_ofs + b,
-						     read_size,
-						     buf);
-		    if (! err)
-		      break;
-		    else if (err != GRUB_ERR_READ_ERROR
-			     && err != GRUB_ERR_UNKNOWN_DEVICE)
-		      return err;
+		    if (seg->nodes[k].readability.generation >= seg->readability.generation)
+		      {
+			err = grub_diskfilter_read_node (&seg->nodes[k],
+							 read_sector
+							 + j * far_ofs + b,
+							 read_size,
+							 buf);
+			if (! err)
+			  break;
+			else if (err != GRUB_ERR_READ_ERROR
+				 && err != GRUB_ERR_UNKNOWN_DEVICE)
+			  return err;
+		      }
 		    k++;
 		    if (k == seg->node_count)
 		      k = 0;
@@ -749,10 +877,13 @@ read_segment (struct grub_diskfilter_segment *seg, grub_disk_addr_t sector,
 		|| grub_errno == GRUB_ERR_UNKNOWN_DEVICE)
 	      grub_errno = GRUB_ERR_NONE;
 
-	    err = grub_diskfilter_read_node (&seg->nodes[disknr],
-					     read_sector + b,
-					     read_size,
-					     buf);
+	    if (seg->nodes[disknr].readability.generation >= seg->readability.generation)
+	      err = grub_diskfilter_read_node (&seg->nodes[disknr],
+					       read_sector + b,
+					       read_size,
+					       buf);
+	    else
+	      err = GRUB_ERR_READ_ERROR;
 
 	    if ((err) && (err != GRUB_ERR_READ_ERROR
 			  && err != GRUB_ERR_UNKNOWN_DEVICE))
@@ -1183,6 +1314,7 @@ static grub_err_t
 insert_array (grub_disk_t disk, const struct grub_diskfilter_pv_id *id,
 	      struct grub_diskfilter_vg *array,
               grub_disk_addr_t start_sector,
+	      grub_uint64_t generation,
 	      grub_diskfilter_t diskfilter __attribute__ ((unused)))
 {
   struct grub_diskfilter_pv *pv;
@@ -1219,6 +1351,7 @@ insert_array (grub_disk_t disk, const struct grub_diskfilter_pv_id *id,
 	pv->start_sector -= pv->part_start;
 	pv->part_start = grub_partition_get_start (disk->partition);
 	pv->part_size = grub_disk_get_size (disk);
+	pv->generation = generation;
 
 #ifdef GRUB_UTIL
 	{
@@ -1238,7 +1371,7 @@ insert_array (grub_disk_t disk, const struct grub_diskfilter_pv_id *id,
 	pv->start_sector += pv->part_start;
 	/* Add the device to the array. */
 	for (lv = array->lvs; lv; lv = lv->next)
-	  if (!lv->became_readable_at && lv->fullname && is_lv_readable (lv, 0))
+	  if (!lv->became_readable_at && lv->fullname && is_lv_readable (lv))
 	    lv->became_readable_at = ++inscnt;
 	break;
       }
diff --git a/grub-core/disk/dmraid_nvidia.c b/grub-core/disk/dmraid_nvidia.c
index 881508c..73895ad 100644
--- a/grub-core/disk/dmraid_nvidia.c
+++ b/grub-core/disk/dmraid_nvidia.c
@@ -92,8 +92,9 @@ struct grub_nv_super
 
 static struct grub_diskfilter_vg *
 grub_dmraid_nv_detect (grub_disk_t disk,
-			struct grub_diskfilter_pv_id *id,
-                        grub_disk_addr_t *start_sector)
+		       struct grub_diskfilter_pv_id *id,
+		       grub_disk_addr_t *start_sector,
+		       grub_uint64_t *generation)
 {
   grub_disk_addr_t sector;
   struct grub_nv_super sb;
@@ -103,6 +104,8 @@ grub_dmraid_nv_detect (grub_disk_t disk,
   grub_uint8_t total_volumes;
   char *uuid;
 
+  *generation = 0;
+
   if (disk->partition)
     /* Skip partition.  */
     return NULL;
diff --git a/grub-core/disk/ldm.c b/grub-core/disk/ldm.c
index 8075f2a..f8f3be0 100644
--- a/grub-core/disk/ldm.c
+++ b/grub-core/disk/ldm.c
@@ -780,12 +780,15 @@ make_vg (grub_disk_t disk,
 static struct grub_diskfilter_vg * 
 grub_ldm_detect (grub_disk_t disk,
 		 struct grub_diskfilter_pv_id *id,
-		 grub_disk_addr_t *start_sector)
+		 grub_disk_addr_t *start_sector,
+		 grub_uint64_t *generation)
 {
   grub_err_t err;
   struct grub_ldm_label label;
   struct grub_diskfilter_vg *vg;
 
+  *generation = 0;
+
 #ifdef GRUB_UTIL
   grub_util_info ("scanning %s for LDM", disk->name);
 #endif
diff --git a/grub-core/disk/lvm.c b/grub-core/disk/lvm.c
index 7b265c7..58b0704 100644
--- a/grub-core/disk/lvm.c
+++ b/grub-core/disk/lvm.c
@@ -98,7 +98,8 @@ grub_lvm_check_flag (char *p, const char *str, const char *flag)
 static struct grub_diskfilter_vg * 
 grub_lvm_detect (grub_disk_t disk,
 		 struct grub_diskfilter_pv_id *id,
-		 grub_disk_addr_t *start_sector)
+		 grub_disk_addr_t *start_sector,
+		 grub_uint64_t *generation)
 {
   grub_err_t err;
   grub_uint64_t mda_offset, mda_size;
@@ -116,6 +117,8 @@ grub_lvm_detect (grub_disk_t disk,
   struct grub_diskfilter_vg *vg;
   struct grub_diskfilter_pv *pv;
 
+  *generation = 0;
+    
   /* Search for label. */
   for (i = 0; i < GRUB_LVM_LABEL_SCAN_SECTORS; i++)
     {
diff --git a/grub-core/disk/mdraid1x_linux.c b/grub-core/disk/mdraid1x_linux.c
index 7cc80d3..40a5ab7 100644
--- a/grub-core/disk/mdraid1x_linux.c
+++ b/grub-core/disk/mdraid1x_linux.c
@@ -106,7 +106,8 @@ struct grub_raid_super_1x
 static struct grub_diskfilter_vg *
 grub_mdraid_detect (grub_disk_t disk,
 		    struct grub_diskfilter_pv_id *id,
-		    grub_disk_addr_t *start_sector)
+		    grub_disk_addr_t *start_sector,
+		    grub_uint64_t *generation)
 {
   grub_uint64_t size;
   grub_uint8_t minor_version;
@@ -198,6 +199,7 @@ grub_mdraid_detect (grub_disk_t disk,
       grub_memcpy (uuid, sb.set_uuid, 16);
 
       *start_sector = grub_le_to_cpu64 (sb.data_offset);
+      *generation = grub_le_to_cpu64 (sb.events);
 
       array = grub_diskfilter_make_raid (16, uuid,
 					 grub_le_to_cpu32 (sb.raid_disks),
diff --git a/grub-core/disk/mdraid_linux.c b/grub-core/disk/mdraid_linux.c
index 11024ae..7b9768c 100644
--- a/grub-core/disk/mdraid_linux.c
+++ b/grub-core/disk/mdraid_linux.c
@@ -180,7 +180,8 @@ struct grub_raid_super_09
 static struct grub_diskfilter_vg *
 grub_mdraid_detect (grub_disk_t disk,
 		    struct grub_diskfilter_pv_id *id,
-		    grub_disk_addr_t *start_sector)
+		    grub_disk_addr_t *start_sector,
+		    grub_uint64_t *generation)
 {
   grub_disk_addr_t sector;
   grub_uint64_t size;
@@ -247,6 +248,7 @@ grub_mdraid_detect (grub_disk_t disk,
   uuid[3] = grub_swap_bytes32 (sb->set_uuid3);
 
   *start_sector = 0;
+  *generation = grub_md_to_cpu64 (sb->cp_events);
 
   id->uuidlen = 0;
   id->id = grub_md_to_cpu32 (sb->this_disk.number);
diff --git a/include/grub/diskfilter.h b/include/grub/diskfilter.h
index 1aedcd3..947500b 100644
--- a/include/grub/diskfilter.h
+++ b/include/grub/diskfilter.h
@@ -69,6 +69,7 @@ struct grub_diskfilter_pv {
   grub_disk_addr_t part_start;
   grub_disk_addr_t part_size;
   grub_disk_addr_t start_sector; /* Sector number where the data area starts. */
+  grub_uint64_t generation;
   struct grub_diskfilter_pv *next;
   /* Optional.  */
   grub_uint8_t *internal_id;
@@ -77,6 +78,15 @@ struct grub_diskfilter_pv {
 #endif
 };
 
+struct grub_diskfilter_readability
+{
+  /* Readable without reading any stale disks or doing any recovery.  */
+  int is_easily_readable;
+  /* Readable at all: even reding stale disks and doing recovery.  */
+  int is_readable;
+  grub_uint64_t generation;
+};
+
 struct grub_diskfilter_lv {
   /* Name used for disk.  */
   char *fullname;
@@ -90,6 +100,7 @@ struct grub_diskfilter_lv {
   int became_readable_at;
   int scanned;
   int visible;
+  struct grub_diskfilter_readability readability;
 
   /* Pointer to segment_count segments. */
   struct grub_diskfilter_segment *segments;
@@ -119,6 +130,7 @@ struct grub_diskfilter_segment {
   unsigned int node_count;
   unsigned int node_alloc;
   struct grub_diskfilter_node *nodes;
+  struct grub_diskfilter_readability readability;
 
   unsigned int stripe_size;
 };
@@ -127,6 +139,7 @@ struct grub_diskfilter_node {
   grub_disk_addr_t start;
   /* Optional.  */
   char *name;
+  struct grub_diskfilter_readability readability;
   struct grub_diskfilter_pv *pv;
   struct grub_diskfilter_lv *lv;
 };
@@ -143,7 +156,8 @@ struct grub_diskfilter
 
   struct grub_diskfilter_vg * (*detect) (grub_disk_t disk,
 					 struct grub_diskfilter_pv_id *id,
-					 grub_disk_addr_t *start_sector);
+					 grub_disk_addr_t *start_sector,
+					 grub_uint64_t *generation);
 };
 typedef struct grub_diskfilter *grub_diskfilter_t;
 

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel

Reply via email to