Re: [PATCH 3/3] readahead: scale max readahead size depending on memory size

Rusty Russell Mon, 23 Jul 2007 15:46:16 -0700

On Mon, 2007-07-23 at 12:04 +0200, Jörn Engel wrote:
> I believe this whole thing is fundamentally flawed.  The perfect
> readahead size depends on the device in question.  If we set a single
> system-wide value depending on memory size, it may easily be too small
> and too large at the same time.  Think hard disk and SSD.


Well, I think the filesystem should get the first shot at altering the
readahead heuristic (we should add a hook so it can then ask the
underlying device).

Something like:
===
Hook for filesystems to tweak readahead logic

It was suggested at Fengguang's OLS talk that filesystems can make a
useful contribution to the readahead logic: avoiding seeks and lock
boundaries (on cluster filesystems) and other such filesystem-specific
things.

The alter_readahead hook should return the number of pages to read.

Signed-off-by: Rusty Russell <[EMAIL PROTECTED]>
Signed-off-by: Fengguang Wu <[EMAIL PROTECTED]>

---
 Documentation/filesystems/vfs.txt |   10 ++++++----
 include/linux/fs.h                |    1 +
 mm/readahead.c                    |   12 ++++++++++++
 3 files changed, 19 insertions(+), 4 deletions(-)

--- linux-2.6.22-rc6-mm1.orig/include/linux/fs.h
+++ linux-2.6.22-rc6-mm1/include/linux/fs.h
@@ -1204,7 +1204,8 @@ struct file_operations {
        ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t 
*, size_t, unsigned int);
        ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info 
*, size_t, unsigned int);
        int (*revoke)(struct file *, struct address_space *);
+       unsigned long (*alter_readahead)(struct file *, pgoff_t, unsigned long, 
pgoff_t, unsigned long);
 };
 
 struct inode_operations {
--- linux-2.6.22-rc6-mm1.orig/mm/readahead.c
+++ linux-2.6.22-rc6-mm1/mm/readahead.c
@@ -394,7 +394,19 @@ ondemand_readahead(struct address_space 
        }
 
 readit:
+       /*
+        * Give filesystem a chance to tweak readahead size (eg. to
+        * contiguous block boundary).  It should probably not change
+        * by more than 50% otherwise it's really ignoring our
+        * readahead advice.
+        */
+       if (filp->f_op->alter_readahead) {
+               ra->size = filp->f_op->alter_readahead(filp, offset, req_size,
+                                                      ra->start, ra->size);
+               if (ra->async_size > ra->size)
+                       ra->async_size = ra->size;
+       }
        return ra_submit(ra, mapping, filp);
 }
 
--- linux-2.6.22-rc6-mm1.orig/Documentation/filesystems/vfs.txt
+++ linux-2.6.22-rc6-mm1/Documentation/filesystems/vfs.txt
@@ -777,11 +777,10 @@ struct file_operations {
        int (*check_flags)(int);
        int (*dir_notify)(struct file *filp, unsigned long arg);
        int (*flock) (struct file *, int, struct file_lock *);
-       ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, 
size_t, unsigned 
-int);
-       ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, 
unsigned  
-int);
+       ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, 
size_t, unsigned int);
+       ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, 
unsigned int);
        int (*revoke)(struct file *);
+       unsigned long (*alter_readahead)(struct file *, pgoff_t, unsigned long, 
pgoff_t, unsigned long);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -859,6 +858,9 @@ otherwise noted.
          to an open file. This method must ensure that all currently blocked
          writes are flushed and reads will fail.
 
+  alter_readahead: called when the readahead logic is about to submit a
+                   readahead request for I/O
+
 Note that the file operations are implemented by the specific
 filesystem in which the inode resides. When opening a device node
 (character or block special) most filesystems will call special


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/3] readahead: scale max readahead size depending on memory size

Reply via email to