Add round-robin read policy to route the read IO to the next device in the
round-robin order. The chunk allocation and thus the stripe-index follows
the order of free space available on devices. So to make the round-robin
effective it shall follow the devid order instead of the stripe-index
order.

Signed-off-by: Anand Jain <[email protected]>
--
RFC because: Provides terrible performance with the fio tests.
I am not yet sure if there is any io workload or a block layer
tuning that shall make this policy better. As of now just an
experimental patch.

 fs/btrfs/sysfs.c   |  2 +-
 fs/btrfs/volumes.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h |  3 +++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 899b66c83db1..d40b0ff054ca 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -917,7 +917,7 @@ static bool strmatch(const char *buffer, const char *string)
 
 /* Must follow the order as in enum btrfs_read_policy */
 static const char * const btrfs_read_policy_name[] = { "pid", "latency",
-                                                      "device" };
+                                                      "device", "roundrobin" };
 
 static ssize_t btrfs_read_policy_show(struct kobject *kobj,
                                      struct kobj_attribute *a, char *buf)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 50d4d54f7abd..60370b9121e0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5491,6 +5491,52 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info 
*fs_info, u64 logical, u64 len)
        return ret;
 }
 
+struct stripe_mirror {
+       u64 devid;
+       int map;
+};
+
+static int btrfs_cmp_devid(const void *a, const void *b)
+{
+       struct stripe_mirror *s1 = (struct stripe_mirror *)a;
+       struct stripe_mirror *s2 = (struct stripe_mirror *)b;
+
+       if (s1->devid < s2->devid)
+               return -1;
+       if (s1->devid > s2->devid)
+               return 1;
+       return 0;
+}
+
+static int btrfs_find_read_round_robin(struct map_lookup *map, int first,
+                                      int num_stripe)
+{
+       struct stripe_mirror stripes[4] = {0}; //4: for testing, works for now.
+       struct btrfs_fs_devices *fs_devices;
+       u64 devid;
+       int index, j, cnt;
+       int next_stripe;
+
+       index = 0;
+       for (j = first; j < first + num_stripe; j++) {
+               devid = map->stripes[j].dev->devid;
+
+               stripes[index].devid = devid;
+               stripes[index].map = j;
+
+               index++;
+       }
+
+       sort(stripes, num_stripe, sizeof(struct stripe_mirror),
+            btrfs_cmp_devid, NULL);
+
+       fs_devices = map->stripes[first].dev->fs_devices;
+       cnt = atomic_inc_return(&fs_devices->total_reads);
+       next_stripe = stripes[cnt % num_stripe].map;
+
+       return next_stripe;
+}
+
 static int btrfs_find_best_stripe(struct btrfs_fs_info *fs_info,
                                  struct map_lookup *map, int first,
                                  int num_stripe)
@@ -5579,6 +5625,10 @@ static int find_live_mirror(struct btrfs_fs_info 
*fs_info,
        case BTRFS_READ_POLICY_DEVICE:
                preferred_mirror = btrfs_find_read_preferred(map, first, 
num_stripes);
                break;
+       case BTRFS_READ_POLICY_ROUND_ROBIN:
+               preferred_mirror = btrfs_find_read_round_robin(map, first,
+                                                              num_stripes);
+               break;
        }
 
        if (dev_replace_is_ongoing &&
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 8d5a2cddc0ab..ce4490437f53 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -227,6 +227,8 @@ enum btrfs_read_policy {
        BTRFS_READ_POLICY_LATENCY,
        /* Use the device marked with READ_PREFERRED state */
        BTRFS_READ_POLICY_DEVICE,
+       /* Distribute read IO equally across striped devices */
+       BTRFS_READ_POLICY_ROUND_ROBIN,
        BTRFS_NR_READ_POLICY,
 };
 
@@ -286,6 +288,7 @@ struct btrfs_fs_devices {
 
        /* Policy used to read the mirrored stripes */
        enum btrfs_read_policy read_policy;
+       atomic_t total_reads;
 };
 
 #define BTRFS_BIO_INLINE_CSUM_SIZE     64
-- 
2.30.0

Reply via email to