Currently raid456 queues up work to a single raid5d thread per array.
Since there are no dependencies between operations on different stripes
I believed a speed up could be obtained by spreading the handle_stripe
load across all available CPU's.  However I am not seeing a speed up, as
measured by tiobench.  I think the reason is that multi-processor
effects will only show up when data is already in the cache.  In this
case the work is already spread out per client thread.  Also work
submitted to workqueues is sticky to the CPU where queue_work() was
called, not load balanced amongst the available CPUs.  I'm posting it
anyway to see if I am overlooking a case where it would be helpful, and
from a cosmetic standpoint it separates raid5d housekeeping work from
handle_stripe work.

Signed-off-by: Dan Williams <[EMAIL PROTECTED]>
---

 drivers/md/raid5.c         |  108 ++++++++++++++++++++++++++------------------
 include/linux/raid/raid5.h |    6 ++
 2 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 11c3d7b..e54310c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -121,7 +121,10 @@ static void __release_stripe(raid5_conf_t *conf, struct 
stripe_head *sh)
                                blk_plug_device(conf->mddev->queue);
                        } else {
                                clear_bit(STRIPE_BIT_DELAY, &sh->state);
-                               list_add_tail(&sh->lru, &conf->handle_list);
+                               conf->workqueue_stripes++;
+                               atomic_inc(&sh->count);
+                               BUG_ON(queue_work(conf->workqueue,
+                                       &sh->work) == 0);
                        }
                        md_wakeup_thread(conf->mddev->thread);
                } else {
@@ -310,6 +313,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t 
*conf, sector_t sector
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
                                if (list_empty(&sh->lru) &&
+                                   !work_pending(&sh->work) &&
                                    !test_bit(STRIPE_EXPANDING, &sh->state))
                                        BUG();
                                list_del_init(&sh->lru);
@@ -324,6 +328,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t 
*conf, sector_t sector
        return sh;
 }
 
+static void raid456_workqueue(struct work_struct *work);
 static int grow_one_stripe(raid5_conf_t *conf)
 {
        struct stripe_head *sh;
@@ -343,6 +348,7 @@ static int grow_one_stripe(raid5_conf_t *conf)
        /* we just created an active stripe so... */
        atomic_set(&sh->count, 1);
        atomic_inc(&conf->active_stripes);
+       INIT_WORK(&sh->work, raid456_workqueue);
        INIT_LIST_HEAD(&sh->lru);
        release_stripe(sh);
        return 1;
@@ -2448,7 +2454,9 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
                        clear_bit(STRIPE_DELAYED, &sh->state);
                        if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, 
&sh->state))
                                atomic_inc(&conf->preread_active_stripes);
-                       list_add_tail(&sh->lru, &conf->handle_list);
+                       conf->workqueue_stripes++;
+                       atomic_inc(&sh->count);
+                       BUG_ON(queue_work(conf->workqueue, &sh->work) == 0);
                }
        }
 }
@@ -3181,7 +3189,6 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct 
bio *raid_bio)
 }
 
 
-
 /*
  * This is our raid5 kernel thread.
  *
@@ -3191,9 +3198,9 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct 
bio *raid_bio)
  */
 static void raid5d (mddev_t *mddev)
 {
-       struct stripe_head *sh;
        raid5_conf_t *conf = mddev_to_conf(mddev);
        int handled;
+       struct bio *bio;
 
        PRINTK("+++ raid5d active\n");
 
@@ -3201,51 +3208,30 @@ static void raid5d (mddev_t *mddev)
 
        handled = 0;
        spin_lock_irq(&conf->device_lock);
-       while (1) {
-               struct list_head *first;
-               struct bio *bio;
-
-               if (conf->seq_flush != conf->seq_write) {
-                       int seq = conf->seq_flush;
-                       spin_unlock_irq(&conf->device_lock);
-                       bitmap_unplug(mddev->bitmap);
-                       spin_lock_irq(&conf->device_lock);
-                       conf->seq_write = seq;
-                       activate_bit_delay(conf);
-               }
-
-               if (list_empty(&conf->handle_list) &&
-                   atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
-                   !blk_queue_plugged(mddev->queue) &&
-                   !list_empty(&conf->delayed_list))
-                       raid5_activate_delayed(conf);
-
-               while ((bio = remove_bio_from_retry(conf))) {
-                       int ok;
-                       spin_unlock_irq(&conf->device_lock);
-                       ok = retry_aligned_read(conf, bio);
-                       spin_lock_irq(&conf->device_lock);
-                       if (!ok)
-                               break;
-                       handled++;
-               }
 
-               if (list_empty(&conf->handle_list))
-                       break;
+       if (conf->seq_flush != conf->seq_write) {
+               int seq = conf->seq_flush;
+               spin_unlock_irq(&conf->device_lock);
+               bitmap_unplug(mddev->bitmap);
+               spin_lock_irq(&conf->device_lock);
+               conf->seq_write = seq;
+               activate_bit_delay(conf);
+       }
 
-               first = conf->handle_list.next;
-               sh = list_entry(first, struct stripe_head, lru);
+       if (conf->workqueue_stripes == 0 &&
+           atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
+           !blk_queue_plugged(conf->mddev->queue) &&
+           !list_empty(&conf->delayed_list))
+               raid5_activate_delayed(conf);
 
-               list_del_init(first);
-               atomic_inc(&sh->count);
-               BUG_ON(atomic_read(&sh->count)!= 1);
+       while ((bio = remove_bio_from_retry(conf))) {
+               int ok;
                spin_unlock_irq(&conf->device_lock);
-               
-               handled++;
-               handle_stripe(sh, conf->spare_page);
-               release_stripe(sh);
-
+               ok = retry_aligned_read(conf, bio);
                spin_lock_irq(&conf->device_lock);
+               if (!ok)
+                       break;
+               handled++;
        }
        PRINTK("%d stripes handled\n", handled);
 
@@ -3256,6 +3242,29 @@ static void raid5d (mddev_t *mddev)
        PRINTK("--- raid5d inactive\n");
 }
 
+static void raid456_workqueue(struct work_struct *work)
+{
+       struct stripe_head *sh = container_of(work, struct stripe_head, work);
+       raid5_conf_t *conf = sh->raid_conf;
+       unsigned long flags;
+       int workqueue_stripes;
+
+       PRINTK("%s called for stripe %llu\n", __FUNCTION__,
+               (unsigned long long)sh->sector);
+
+       handle_stripe(sh, conf->spare_page);
+
+       spin_lock_irqsave(&conf->device_lock, flags);
+       __release_stripe(conf, sh);
+       workqueue_stripes = --conf->workqueue_stripes;
+       spin_unlock_irqrestore(&conf->device_lock, flags);
+
+       if (workqueue_stripes == 0)
+               raid5d(conf->mddev);
+       else if (unlikely(workqueue_stripes < 0))
+               BUG();
+}
+
 static ssize_t
 raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
 {
@@ -3409,16 +3418,22 @@ static int run(mddev_t *mddev)
                if (!conf->spare_page)
                        goto abort;
        }
+
+       sprintf(conf->workqueue_name, "%s_work", mddev->gendisk->disk_name);
+       
+       if ((conf->workqueue = create_workqueue(conf->workqueue_name)) == NULL)
+               goto abort;
+
        spin_lock_init(&conf->device_lock);
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
-       INIT_LIST_HEAD(&conf->handle_list);
        INIT_LIST_HEAD(&conf->delayed_list);
        INIT_LIST_HEAD(&conf->bitmap_list);
        INIT_LIST_HEAD(&conf->inactive_list);
        atomic_set(&conf->active_stripes, 0);
        atomic_set(&conf->preread_active_stripes, 0);
        atomic_set(&conf->active_aligned_reads, 0);
+       conf->workqueue_stripes = 0;
 
        PRINTK("raid5: run(%s) called.\n", mdname(mddev));
 
@@ -3574,6 +3589,8 @@ abort:
                safe_put_page(conf->spare_page);
                kfree(conf->disks);
                kfree(conf->stripe_hashtbl);
+               if (conf->workqueue)
+                       destroy_workqueue(conf->workqueue);
                kfree(conf);
        }
        mddev->private = NULL;
@@ -3593,6 +3610,7 @@ static int stop(mddev_t *mddev)
        kfree(conf->stripe_hashtbl);
        blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
        sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
+       destroy_workqueue(conf->workqueue);
        kfree(conf->disks);
        kfree(conf);
        mddev->private = NULL;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index d8286db..3dc410e 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -136,6 +136,7 @@ struct stripe_head {
        spinlock_t              lock;
        int                     bm_seq; /* sequence number for bitmap flushes */
        int                     disks;                  /* disks in stripe */
+       struct work_struct      work;                   /* handle_stripe 
workqueue */
        struct r5dev {
                struct bio      req;
                struct bio_vec  vec;
@@ -224,15 +225,18 @@ struct raid5_private_data {
                                            */
        int                     previous_raid_disks;
 
-       struct list_head        handle_list; /* stripes needing handling */
        struct list_head        delayed_list; /* stripes that have plugged 
requests */
        struct list_head        bitmap_list; /* stripes delaying awaiting 
bitmap update */
        struct bio              *retry_read_aligned; /* currently retrying 
aligned bios   */
        struct bio              *retry_read_aligned_list; /* aligned bios retry 
list  */
+       int                     workqueue_stripes; /* stripes currently being 
handled */
        atomic_t                preread_active_stripes; /* stripes with 
scheduled io */
        atomic_t                active_aligned_reads;
 
        atomic_t                reshape_stripes; /* stripes with pending writes 
for reshape */
+
+       struct workqueue_struct *workqueue;
+       char                    workqueue_name[20];     
        /* unfortunately we need two cache names as we temporarily have
         * two caches.
         */
-
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to