raidframe - large disks don't work for rebuilding -- patch (??)

Marshall Midden Thu, 05 Feb 2009 20:55:27 -0800

I have six 500 gb satas in a raid5. When I pulled one to switch from 32 bit 
machine to 64 bit, it went bad -- no problem, right?  (;-)>


Anyways, to do a "raidctl -R /dev/wd5d raid1", I got a crash in raidframe 
malloc call. Googling got me patch from June 2008 (?) in NetBSD.  Of course, 
OpenBSD has nicely cleaned up the source [and fixed things!] -- making diff's 
interesting.

Anyways, I have it rebuilding now, and hope I didn't blow the patch (attached, 
but it needs much more cleanup!).

Index: rf_reconmap.c
===================================================================
RCS file: /cvs/src/sys/dev/raidframe/rf_reconmap.c,v
retrieving revision 1.4
diff -U5 -r1.4 rf_reconmap.c
--- rf_reconmap.c       16 Dec 2002 07:01:05 -0000      1.4
+++ rf_reconmap.c       6 Feb 2009 03:40:55 -0000
@@ -53,17 +53,16 @@
 
 /* Used to mark the end of the list. */
 #define        RU_NIL          ((RF_ReconMapListElem_t *) 0)
 
 
-void rf_compact_stat_entry(RF_Raid_t *, RF_ReconMap_t *, int);
+void rf_compact_stat_entry(RF_Raid_t *, RF_ReconMap_t *, int, int);
 void rf_crunch_list(RF_ReconMap_t *, RF_ReconMapListElem_t *);
 RF_ReconMapListElem_t * rf_MakeReconMapListElem(RF_SectorNum_t, RF_SectorNum_t,
        RF_ReconMapListElem_t *);
 void rf_FreeReconMapListElem(RF_ReconMap_t *, RF_ReconMapListElem_t *);
 void rf_update_size(RF_ReconMap_t *, int);
-void rf_PrintList(RF_ReconMapListElem_t *);
 
 
 /*****************************************************************************
  *
  * Creates and initializes new Reconstruction map.
@@ -95,16 +94,20 @@
        p->sectorsInDisk = disk_sectors;
 
        p->totalRUs = num_rus;
        p->spareRUs = spareUnitsPerDisk;
        p->unitsLeft = num_rus - spareUnitsPerDisk;
+       p->low_ru = 0;
+       p->status_size = RF_RECONMAP_SIZE;
+       p->high_ru = p->status_size - 1;
+       p->head = 0;
 
-       RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *),
+       RF_Malloc(p->status, p->status_size * sizeof(RF_ReconMapListElem_t *),
            (RF_ReconMapListElem_t **));
        RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL);
 
-       (void) bzero((char *) p->status, num_rus *
+       (void) bzero((char *) p->status, p->status_size *
            sizeof(RF_ReconMapListElem_t *));
 
        p->size = sizeof(RF_ReconMap_t) + num_rus *
            sizeof(RF_ReconMapListElem_t *);
        p->maxSize = p->size;
@@ -139,27 +142,68 @@
 void
 rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr,
     RF_SectorNum_t startSector, RF_SectorNum_t stopSector)
 {
        RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit;
-       RF_SectorNum_t i, first_in_RU, last_in_RU;
+       RF_SectorNum_t i, first_in_RU, last_in_RU, ru;
        RF_ReconMapListElem_t *p, *pt;
 
        RF_LOCK_MUTEX(mapPtr->mutex);
        RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk &&
            stopSector >= startSector);
 
        while (startSector <= stopSector) {
                i = startSector / mapPtr->sectorsPerReconUnit;
                first_in_RU = i * sectorsPerReconUnit;
                last_in_RU = first_in_RU + sectorsPerReconUnit - 1;
-               p = mapPtr->status[i];
+//             p = mapPtr->status[i];
+
+                /* do we need to move the queue? */
+                while (i > mapPtr->high_ru) {
+#ifdef DIAGNOSTIC
+                       if (mapPtr->status[mapPtr->head]!=RU_ALL) {
+                               printf("\nraid%d: reconmap incorrect -- working 
on i %llu\n",
+                                      raidPtr->raidid, i);
+                               printf("raid%d: ru %llu not completed!!!\n",
+                                      raidPtr->raidid, mapPtr->head);
+ 
+                               printf("raid%d: low: %llu high: %llu\n",
+                                      raidPtr->raidid, mapPtr->low_ru, 
mapPtr->high_ru);
+
+                               panic("reconmap incorrect");
+                       }
+#endif
+                       mapPtr->low_ru++;
+                       mapPtr->high_ru++;
+                       /* initialize "highest" RU status entry, which
+                          will take over the current head postion */
+                       mapPtr->status[mapPtr->head]=RU_NOTHING;
+
+                       /* move head too */
+                       mapPtr->head++;
+                       if (mapPtr->head >= mapPtr->status_size)
+                      {
+                               mapPtr->head = 0;
+                      }
+               }
+
+               ru = i - mapPtr->low_ru + mapPtr->head;
+               if (ru >= mapPtr->status_size)
+                       ru = ru - mapPtr->status_size;
+
+               if ((ru < 0) || (ru >= mapPtr->status_size)) {
+                       printf("raid%d: ru is bogus %llu %llu %llu %llu %llu\n",
+                              raidPtr->raidid, i, ru, mapPtr->head, 
mapPtr->low_ru, mapPtr->high_ru);
+                       panic("bogus ru in reconmap");
+               }
+
+               p = mapPtr->status[ru];
                if (p != RU_ALL) {
                        if (p == RU_NOTHING || p->startSector > startSector) {
                                /* Insert at front of list. */
 
-                               mapPtr->status[i] =
+                               mapPtr->status[ru] =
                                    rf_MakeReconMapListElem(startSector,
                                     RF_MIN(stopSector, last_in_RU),
                                     (p == RU_NOTHING) ? NULL : p);
                                rf_update_size(mapPtr,
                                    sizeof(RF_ReconMapListElem_t));
@@ -172,11 +216,11 @@
                                pt->next = rf_MakeReconMapListElem(startSector,
                                    RF_MIN(stopSector, last_in_RU), p);
                                rf_update_size(mapPtr,
                                    sizeof(RF_ReconMapListElem_t));
                        }
-                       rf_compact_stat_entry(raidPtr, mapPtr, i);
+                       rf_compact_stat_entry(raidPtr, mapPtr, i, ru);
                }
                startSector = RF_MIN(stopSector, last_in_RU) + 1;
        }
        RF_UNLOCK_MUTEX(mapPtr->mutex);
 }
@@ -197,21 +241,21 @@
  * code, but necessary when called from the user-write code.
  *
  *****************************************************************************/
 
 void
-rf_compact_stat_entry(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, int i)
+rf_compact_stat_entry(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, int i, int j)
 {
        RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit;
-       RF_ReconMapListElem_t *p = mapPtr->status[i];
+       RF_ReconMapListElem_t *p = mapPtr->status[j];
 
        rf_crunch_list(mapPtr, p);
 
        if ((p->startSector == i * sectorsPerReconUnit) &&
            (p->stopSector == i * sectorsPerReconUnit +
             sectorsPerReconUnit - 1)) {
-               mapPtr->status[i] = RU_ALL;
+               mapPtr->status[j] = RU_ALL;
                mapPtr->unitsLeft--;
                rf_FreeReconMapListElem(mapPtr, p);
        }
 }
 
@@ -295,11 +339,11 @@
 
        numRUs = mapPtr->sectorsInDisk / mapPtr->sectorsPerReconUnit;
        if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit)
                numRUs++;
 
-       for (i = 0; i < numRUs; i++) {
+       for (i = 0; i < mapPtr->status_size; i++) {
                p = mapPtr->status[i];
                while (p != RU_NOTHING && p != RU_ALL) {
                        q = p;
                        p = p->next;
                        RF_Free(q, sizeof(*q));
@@ -319,16 +363,30 @@
  *****************************************************************************/
 
 int
 rf_CheckRUReconstructed(RF_ReconMap_t *mapPtr, RF_SectorNum_t startSector)
 {
-       RF_ReconMapListElem_t *l;       /* Used for searching. */
        RF_ReconUnitNum_t i;
+       int rv;
 
        i = startSector / mapPtr->sectorsPerReconUnit;
-       l = mapPtr->status[i];
-       return ((l == RU_ALL) ? 1 : 0);
+//     l = mapPtr->status[i];
+//     return ((l == RU_ALL) ? 1 : 0);
+       if (i < mapPtr->low_ru)
+               rv = 1;
+       else if (i > mapPtr->high_ru)
+               rv = 0;
+       else {
+               i = i - mapPtr->low_ru + mapPtr->head;
+               if (i >= mapPtr->status_size)
+                       i = i - mapPtr->status_size;
+               if (mapPtr->status[i] == RU_ALL)
+                       rv = 1;
+               else
+                       rv = 0;
+       }
+       return rv;
 }
 
 RF_ReconUnitCount_t
 rf_UnitsLeftToReconstruct(RF_ReconMap_t *mapPtr)
 {
@@ -340,47 +398,10 @@
 void
 rf_update_size(RF_ReconMap_t *mapPtr, int size)
 {
        mapPtr->size += size;
        mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize);
-}
-
-void
-rf_PrintList(RF_ReconMapListElem_t *listPtr)
-{
-       while (listPtr) {
-               printf("%d,%d -> ", (int) listPtr->startSector,
-                   (int) listPtr->stopSector);
-               listPtr = listPtr->next;
-       }
-       printf("\n");
-}
-
-void
-rf_PrintReconMap(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, RF_RowCol_t frow,
-    RF_RowCol_t fcol)
-{
-       RF_ReconUnitCount_t numRUs;
-       RF_ReconMapListElem_t *p;
-       RF_ReconUnitNum_t i;
-
-       numRUs = mapPtr->totalRUs;
-       if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit)
-               numRUs++;
-
-       for (i = 0; i < numRUs; i++) {
-               p = mapPtr->status[i];
-               if (p == RU_ALL)
-                       /* printf("[%d] ALL.\n", i) */;
-               else
-                       if (p == RU_NOTHING) {
-                               printf("%d: Unreconstructed.\n", i);
-                       } else {
-                               printf("%d: ", i);
-                               rf_PrintList(p);
-                       }
-       }
 }
 
 void
 rf_PrintReconSchedule(RF_ReconMap_t *mapPtr, struct timeval *starttime)
 {
Index: rf_reconmap.h
===================================================================
RCS file: /cvs/src/sys/dev/raidframe/rf_reconmap.h,v
retrieving revision 1.3
diff -U5 -r1.3 rf_reconmap.h
--- rf_reconmap.h       16 Dec 2002 07:01:05 -0000      1.3
+++ rf_reconmap.h       6 Feb 2009 03:40:55 -0000
@@ -38,10 +38,13 @@
 #define        _RF__RF_RECONMAP_H_
 
 #include "rf_types.h"
 #include "rf_threadstuff.h"
 
+/* the number of recon units in the status table. */
+#define RF_RECONMAP_SIZE 32
+
 /*
  * Main reconstruction status descriptor; size and maxsize are used for
  * monitoring only: they have no function for reconstruction.
  */
 struct RF_ReconMap_s {
@@ -55,10 +58,17 @@
        RF_ReconUnitCount_t       totalRUs;     /* Total recon units on disk. */
        RF_ReconUnitCount_t       spareRUs;     /*
                                                 * Total number of spare RUs on
                                                 * failed disk.
                                                 */
+        RF_ReconUnitCount_t      low_ru;       /* lowest reconstruction unit 
number in
+                                                 *the status array */
+        RF_ReconUnitCount_t      high_ru;      /* highest reconstruction unit 
number
+                                                 * in the status array */
+        RF_ReconUnitCount_t      head;         /* the position in the array 
where
+                                                * low_ru is found */
+        RF_ReconUnitCount_t      status_size;  /* number of recon units in 
status */
        RF_StripeCount_t          totalParityStripes;
                                                /*
                                                 * Total number of parity
                                                 * stripes in array.
                                                 */
Index: rf_reconstruct.c
===================================================================
RCS file: /cvs/src/sys/dev/raidframe/rf_reconstruct.c,v
retrieving revision 1.16
diff -U5 -r1.16 rf_reconstruct.c
--- rf_reconstruct.c    5 Jun 2007 00:38:22 -0000       1.16
+++ rf_reconstruct.c    6 Feb 2009 03:40:55 -0000
@@ -162,11 +162,18 @@
                    (void *)((unsigned long)a),                         \
                    (void *)((unsigned long)b),                         \
                    NULL, NULL, NULL, NULL, NULL, NULL);                \
 } while (0)
 
+#define RF_RECON_DONE_READS   1
+#define RF_RECON_READ_ERROR   2
+#define RF_RECON_WRITE_ERROR  3
+#define RF_RECON_READ_STOPPED 4
+#define RF_RECON_WRITE_DONE   5
+
 static RF_FreeList_t *rf_recond_freelist;
+
 #define        RF_MAX_FREE_RECOND      4
 #define        RF_RECOND_INC           1
 
 RF_RaidReconDesc_t *rf_AllocRaidReconDesc(RF_Raid_t *,
        RF_RowCol_t, RF_RowCol_t, RF_RaidDisk_t *, int,
@@ -714,13 +721,17 @@
        RF_RowCol_t srow = reconDesc->srow;
        RF_RowCol_t scol = reconDesc->scol;
        RF_ReconMap_t *mapPtr;
 
        RF_ReconEvent_t *event;
+       RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev;
+       RF_ReconUnitCount_t RUsPerPU;
        struct timeval etime, elpsd;
        unsigned long xor_s, xor_resid_us;
        int retcode, i, ds;
+       int status, done;
+       int recon_error, write_error;
 
        switch (reconDesc->state) {
        case 0:
                raidPtr->accumXorTimeUs = 0;
 
@@ -760,68 +771,174 @@
 
                RF_UNLOCK_MUTEX(raidPtr->mutex);
 
                RF_GETTIME(raidPtr->reconControl[row]->starttime);
 
-               /*
-                * Now start up the actual reconstruction: issue a read for
-                * each surviving disk.
-                */
-
-               reconDesc->numDisksDone = 0;
-               for (i = 0; i < raidPtr->numCol; i++) {
-                       if (i != col) {
-                               /*
-                                * Find and issue the next I/O on the
-                                * indicated disk.
-                                */
-                               if (rf_IssueNextReadRequest(raidPtr, row, i)) {
-                                       Dprintf2("RECON: done issuing for r%d"
-                                           " c%d.\n", row, i);
-                                       reconDesc->numDisksDone++;
-                               }
-                       }
-               }
-
+//             /*
+//              * Now start up the actual reconstruction: issue a read for
+//              * each surviving disk.
+//              */
+//
+//             reconDesc->numDisksDone = 0;
+//             for (i = 0; i < raidPtr->numCol; i++) {
+//                     if (i != col) {
+//                             /*
+//                              * Find and issue the next I/O on the
+//                              * indicated disk.
+//                              */
+//                             if (rf_IssueNextReadRequest(raidPtr, row, i)) {
+//                                     Dprintf2("RECON: done issuing for r%d"
+//                                         " c%d.\n", row, i);
+//                                     reconDesc->numDisksDone++;
+//                             }
+//                     }
+//             }
+//
                reconDesc->state = 2;
 
        case 2:
                Dprintf("RECON: resume requests.\n");
                rf_ResumeNewRequests(raidPtr);
 
                reconDesc->state = 3;
 
        case 3:
 
-               /*
-                * Process reconstruction events until all disks report that
-                * they've completed all work.
-                */
+//             /*
+//              * Process reconstruction events until all disks report that
+//              * they've completed all work.
+//              */
                mapPtr = raidPtr->reconControl[row]->reconMap;
 
-               while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
+//             while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
+ 
+               incPSID = RF_RECONMAP_SIZE;
+               lastPSID = raidPtr->Layout.numStripe / raidPtr->Layout.SUsPerPU;
+               RUsPerPU = raidPtr->Layout.SUsPerPU / raidPtr->Layout.SUsPerRU;
+               recon_error = 0;
+               write_error = 0;
+               pending_writes = incPSID;
+               raidPtr->reconControl[row]->lastPSID = incPSID;
+               done = 0;
+               while (!done) {
+                       num_writes = 0;
+                       /* issue a read for each surviving disk */
+                       reconDesc->numDisksDone = 0;
+                       for (i = 0; i < raidPtr->numCol; i++) {
+                               if (i != col) {
+                                       /* find and issue the next I/O on the
+                                        * indicated disk */
+                                       if (rf_IssueNextReadRequest(raidPtr, 
row, i)) {
+                                               Dprintf2("RECON: done issuing 
for r%d\n"
+                                                   " c%d.\n", row, i);
+                                               reconDesc->numDisksDone++;
+                                       }
+                               }
+                       }
 
-                       event = rf_GetNextReconEvent(reconDesc, row,
-                          (void (*) (void *)) rf_ContinueReconstructFailedDisk,
-                           reconDesc);
-                       RF_ASSERT(event);
+                       /* process reconstruction events until all disks report 
that
+                        * they've completed all work */
 
-                       if (rf_ProcessReconEvent(raidPtr, row, event))
-                               reconDesc->numDisksDone++;
-                       raidPtr->reconControl[row]->numRUsTotal =
-                               mapPtr->totalRUs;
-                       raidPtr->reconControl[row]->numRUsComplete =
-                               mapPtr->totalRUs -
-                               rf_UnitsLeftToReconstruct(mapPtr);
+                       while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
+        
+                               event = rf_GetNextReconEvent(reconDesc, row,
+                                  (void (*) (void *)) 
rf_ContinueReconstructFailedDisk,
+                                   reconDesc);
+                               status = rf_ProcessReconEvent(raidPtr, row, 
event);
+        
+                               /* the normal case is that a read completes, 
and all is well. */
+                               if (status == RF_RECON_DONE_READS) {
+                                       reconDesc->numDisksDone++;
+                               } else if ((status == RF_RECON_READ_ERROR) ||
+                                          (status == RF_RECON_WRITE_ERROR)) {
+                                       /* an error was encountered while 
reconstructing...
+                                          Pretend we've finished this disk.   
+                                       */
+                                       recon_error = 1;
+                                       raidPtr->reconControl[row]->error = 1;
+        
+                                       /* bump the numDisksDone count for 
reads,
+                                          but not for writes */
+                                       if (status == RF_RECON_READ_ERROR)
+                                               reconDesc->numDisksDone++;
+        
+                                       /* write errors are special -- when we 
are
+                                          done dealing with the reads that are
+                                          finished, we don't want to wait for 
any
+                                          writes */
+                                       if (status == RF_RECON_WRITE_ERROR)
+                                               write_error = 1;
+        
+                               } else if (status == RF_RECON_READ_STOPPED) {
+                                       /* count this component as being "done" 
*/
+                                       reconDesc->numDisksDone++;
+                               } else if (status == RF_RECON_WRITE_DONE) {
+                                       num_writes++;
+                               }
+                        
+                               if (recon_error) {
+                                       /* make sure any stragglers are woken 
up so that
+                                          their theads will complete, and we 
can get out
+                                          of here with all IO processed */
 
-                       raidPtr->reconControl[row]->percentComplete =
-                           (raidPtr->reconControl[row]->numRUsComplete * 100 /
-                            raidPtr->reconControl[row]->numRUsTotal);
-                       if (rf_prReconSched) {
-                               rf_PrintReconSchedule(
-                                   raidPtr->reconControl[row]->reconMap,
-                                   &(raidPtr->reconControl[row]->starttime));
+                                       rf_WakeupHeadSepCBWaiters(raidPtr, row);
+                               }
+
+                               raidPtr->reconControl[row]->numRUsTotal =
+                                       mapPtr->totalRUs;
+                               raidPtr->reconControl[row]->numRUsComplete =
+                                       mapPtr->totalRUs -
+                                       rf_UnitsLeftToReconstruct(mapPtr);
+
+#if RF_DEBUG_RECON
+                               raidPtr->reconControl[row]->percentComplete =
+                                       
(raidPtr->reconControl[row]->numRUsComplete * 100 / 
raidPtr->reconControl[row]->numRUsTot
+       al);
+                               if (rf_prReconSched) {
+                                       
rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, 
&(raidPtr->reconControl[row]->starttime));
+                               }
+       #endif
+                       }
+                                  
+                       /* reads done, wakup any waiters, and then wait for 
writes */
+                          
+                       rf_WakeupHeadSepCBWaiters(raidPtr, row);
+                                       
+                       while (!recon_error && (num_writes < pending_writes)) {
+                               event = rf_GetNextReconEvent(reconDesc, row,
+                                  (void (*) (void *)) 
rf_ContinueReconstructFailedDisk,
+                                   reconDesc);
+                               status = rf_ProcessReconEvent(raidPtr, row, 
event);
+        
+                               if (status == RF_RECON_WRITE_ERROR) {
+                                       recon_error = 1;
+                                       raidPtr->reconControl[row]->error = 1;
+                                       /* an error was encountered at the very 
end... bail */
+                               } else if (status == RF_RECON_WRITE_DONE) {
+                                       num_writes++;
+                               }
+                       }
+                       if (recon_error ||
+                           (raidPtr->reconControl[row]->lastPSID == lastPSID)) 
{
+                               done = 1;
+                               break;
+                       }
+        
+                       prev = raidPtr->reconControl[row]->lastPSID;
+                       raidPtr->reconControl[row]->lastPSID += incPSID;
+        
+                       if (raidPtr->reconControl[row]->lastPSID > lastPSID) {  
+                               pending_writes = lastPSID - prev;
+                               raidPtr->reconControl[row]->lastPSID = lastPSID;
+                       }
+        
+                       /* back down curPSID to get ready for the next round... 
*/
+                       for (i = 0; i < raidPtr->numCol; i++) {
+                               if (i != col) {
+                                       
raidPtr->reconControl[row]->perDiskInfo[i].curPSID--;
+                                       
raidPtr->reconControl[row]->perDiskInfo[i].ru_count = RUsPerPU - 1;
+                               }
                        }
                }
 
                reconDesc->state = 4;
 
@@ -833,11 +950,11 @@
                /*
                 * At this point all the reads have completed. We now wait
                 * for any pending writes to complete, and then we're done.
                 */
 
-               while (rf_UnitsLeftToReconstruct(
+               while (!recon_error && rf_UnitsLeftToReconstruct(
                    raidPtr->reconControl[row]->reconMap) > 0) {
 
                        event = rf_GetNextReconEvent(reconDesc, row,
                           (void (*) (void *)) rf_ContinueReconstructFailedDisk,
                            reconDesc);
@@ -992,10 +1109,11 @@
                } else
                        if (rbuf->type == RF_RBUF_TYPE_FORCED)
                                rf_FreeReconBuffer(rbuf);
                        else
                                RF_ASSERT(0);
+               retcode = RF_RECON_WRITE_DONE;
                break;
 
                /* A buffer-stall condition has been cleared. */
        case RF_REVENT_BUFCLEAR:
                Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d.\n", frow,
@@ -1267,10 +1385,38 @@
        pssPtr->issued[col] = 1;
 
 out:
        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
        return (0);
+}
+
+
+void
+rf_WakeupHeadSepCBWaiters(RF_Raid_t *raidPtr, RF_RowCol_t row)
+{
+       RF_CallbackDesc_t *p;
+                               
+       RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
+//       while(raidPtr->reconControl[row]->rb_lock) {   
+//               ltsleep(&raidPtr->reconControl[row]->rb_lock, PRIBIO,  
+//                       "rf_wakeuphscbw", 0, 
&raidPtr->reconControl[row]->rb_mutex);
+//       }
+                       
+       raidPtr->reconControl[row]->rb_lock = 1;
+       RF_UNLOCK_MUTEX(raidPtr->reconControl[row]->rb_mutex);
+                       
+       while (raidPtr->reconControl[row]->headSepCBList) {
+               p = raidPtr->reconControl[row]->headSepCBList;
+               raidPtr->reconControl[row]->headSepCBList = p->next;
+               p->next = NULL;
+               rf_CauseReconEvent(raidPtr, row, p->col, NULL, 
RF_REVENT_HEADSEPCLEAR);
+               rf_FreeCallbackDesc(p);
+       }
+       RF_LOCK_MUTEX(raidPtr->reconControl[row]->rb_mutex);
+       raidPtr->reconControl[row]->rb_lock = 0;
+       wakeup(&raidPtr->reconControl[row]->rb_lock);
+       RF_UNLOCK_MUTEX(raidPtr->reconControl[row]->rb_mutex);
 }
 
 
 /*
  * Given a parity stripe ID, we want to find out whether both the
Index: rf_reconstruct.h
===================================================================
RCS file: /cvs/src/sys/dev/raidframe/rf_reconstruct.h,v
retrieving revision 1.5
diff -U5 -r1.5 rf_reconstruct.h
--- rf_reconstruct.h    16 Dec 2002 07:01:05 -0000      1.5
+++ rf_reconstruct.h    6 Feb 2009 03:40:55 -0000
@@ -216,10 +216,14 @@
                                                 */
        int                      numRUsTotal;   /*
                                                 * Total number of
                                                 * Reconstruction Units.
                                                 */
+       int                     error;          /* non-0 indicates that an 
error has
+                                                * occurred during the 
reconstruction, and
+                                                * the reconstruction is in the 
process of
+                                                * bailing out. */
 
        /* Reconstruction event queue. */
        RF_ReconEvent_t         *eventQueue;    /*
                                                 * Queue of pending
                                                 * reconstruction events.
@@ -237,10 +241,14 @@
        /* Reconstruction buffer management. */
        RF_DECLARE_MUTEX        (rb_mutex);     /*
                                                 * Mutex for messing around
                                                 * with recon buffers.
                                                 */
+       int rb_lock;                            /* 1 if someone is mucking  
+                                                 * with recon buffers,
+                                                 * 0 otherwise */
+
        RF_ReconBuffer_t        *floatingRbufs; /*
                                                 * Available floating
                                                 * reconstruction buffers.
                                                 */
        RF_ReconBuffer_t        *committedRbufs;/*
@@ -299,9 +307,10 @@
 int  rf_ReconstructInPlace(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
 int  rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *);
 int  rf_ForceOrBlockRecon(RF_Raid_t *, RF_AccessStripeMap_t *,
        void (*) (RF_Raid_t *, void *), void *);
 int  rf_UnblockRecon(RF_Raid_t *, RF_AccessStripeMap_t *);
+void rf_WakeupHeadSepCBWaiters(RF_Raid_t *raidPtr, RF_RowCol_t row);
 int  rf_RegisterReconDoneProc(RF_Raid_t *, void (*) (RF_Raid_t *, void *),
        void *, RF_ReconDoneProc_t **);
 
 #endif /* !_RF__RF_RECONSTRUCT_H_ */

[demime 1.01d removed an attachment of type application/octet-stream which had 
a name of A.cvs.diff.raidframe]

raidframe - large disks don't work for rebuilding -- patch (??)

Reply via email to