Ok, here is a test patch.  Now, there are some instructions to go along
    with this patch, so continue reading.

    I have implemented two optimizations.  You can turn either or both (or
    neither) on with a sysctl.  I would like those interested to test all
    four combinations.  Be sure to delete any test files and 'sync' a couple
    of times between each test run so you do not skew the results.

    sysctl -w vm.msync_flush_flags=0

        No optimizations.  We don't try to sort the object flush
        (original behavior).  This is the default for this test patch.

    sysctl -w vm.msync_flush_flags=1

        Hard sequential optimization.  Attempt to locate sequential pages
        by indexing through the requested flush range, performing
        vm_page_lookup()'s.  If we miss more then a certain number in a row,
        however, we break out of the loop (otherwise this can lockup the
        system when flushing a very large multi-gigabyte or multi-terrabyte
        object).

        This optimization works best when the user is msync()ing a specific
        known-to-be-mostly-dirty page range.  The only downside is that this
        can eat more cpu for other cases.  However, the upside is that for
        huge objects and small page ranges this optimization allows us to
        completely avoid scanning the object's memq, yielding an extreme
        performance benefit.

    sysctl -w vm.msync_flush_flags=2

        Soft sequential optimization during object->memq scan.  
        vm_object_page_clean() already attempts to cluster write operations
        but is limited to around 16 pages.  This optimization attempts to
        'glue' clustered ops together by looking for the next sequential
        page after the cluster that was just flushed and jumping to it for
        the next cluster.

    sysctl -w vm.msync_flush_flags=3

        This turns on both optimizations.

    I do not formally sort the object->memq.  I looked at doing so but it
    looked fairly expensive.

                                                -Matt

Index: vm/vm_object.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_object.c,v
retrieving revision 1.171.2.5
diff -u -r1.171.2.5 vm_object.c
--- vm/vm_object.c      3 Nov 2001 19:59:28 -0000       1.171.2.5
+++ vm/vm_object.c      22 Feb 2002 21:52:03 -0000
@@ -75,6 +75,8 @@
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -89,7 +91,18 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
-static void    vm_object_qcollapse __P((vm_object_t object));
+#define EASY_SCAN_FACTOR       8
+
+#define MSYNC_FLUSH_HARDSEQUENTIAL     0x01
+#define MSYNC_FLUSH_SOFTSEQUENTIAL     0x02
+
+static int msync_flush_flags = 0;
+SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
+        CTLFLAG_RW, &msync_flush_flags, 0, "");
+
+
+static void    vm_object_qcollapse (vm_object_t object);
+static int     vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int 
+curgeneration, int pagerflags);
 
 /*
  *     Virtual memory objects maintain the actual data
@@ -506,21 +519,12 @@
        vm_pindex_t end;
        int flags;
 {
-       vm_page_t p, np, tp;
+       vm_page_t p, np;
        vm_offset_t tstart, tend;
        vm_pindex_t pi;
-       int s;
        struct vnode *vp;
-       int runlen;
-       int maxf;
-       int chkb;
-       int maxb;
-       int i;
        int clearobjflags;
        int pagerflags;
-       vm_page_t maf[vm_pageout_page_count];
-       vm_page_t mab[vm_pageout_page_count];
-       vm_page_t ma[vm_pageout_page_count];
        int curgeneration;
 
        if (object->type != OBJT_VNODE ||
@@ -534,6 +538,9 @@
 
        vm_object_set_flag(object, OBJ_CLEANING);
 
+       /*
+        * Handle 'entire object' case
+        */
        tstart = start;
        if (end == 0) {
                tend = object->size;
@@ -542,6 +549,72 @@
        }
 
        /*
+        * If the caller is smart and only msync()s a range he knows is
+        * dirty, we may be able to avoid an object scan.  This results in
+        * a phenominal improvement in performance.  We cannot do this
+        * as a matter of course because the object may be huge - e.g.
+        * the size might be in the gigabytes or terrabytes.
+        */
+       if (msync_flush_flags & MSYNC_FLUSH_HARDSEQUENTIAL) {
+               vm_offset_t tscan;
+               int scanlimit;
+               int scanreset;
+
+               scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
+               if (scanreset < 16)
+                       scanreset = 16;
+
+               scanlimit = scanreset;
+               tscan = tstart;
+               while (tscan < tend) {
+                       curgeneration = object->generation;
+                       p = vm_page_lookup(object, tscan);
+                       if (p == NULL || p->valid == 0 ||
+                           (p->queue - p->pc) == PQ_CACHE) {
+                               if (--scanlimit == 0)
+                                       break;
+                               ++tscan;
+                               continue;
+                       }
+                       vm_page_test_dirty(p);
+                       if ((p->dirty & p->valid) == 0) {
+                               if (--scanlimit == 0)
+                                       break;
+                               ++tscan;
+                               continue;
+                       }
+                       /*
+                        * If we have been asked to skip nosync pages and 
+                        * this is a nosync page, we can't continue.
+                        */
+                       if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
+                               if (--scanlimit == 0)
+                                       break;
+                               ++tscan;
+                               continue;
+                       }
+                       scanlimit = scanreset;
+
+                       /*
+                        * This returns 0 if it was unable to busy the first
+                        * page (i.e. had to sleep).
+                        */
+                       tscan += vm_object_page_collect_flush(object, p, 
+curgeneration, pagerflags);
+               }
+
+               /*
+                * If everything was dirty and we flushed it successfully,
+                * and the requested range is not the entire object, we
+                * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
+                * return immediately.
+                */
+               if (tscan >= tend && (tstart || tend < object->size)) {
+                       vm_object_clear_flag(object, OBJ_CLEANING);
+                       return;
+               }
+       }
+
+       /*
         * Generally set CLEANCHK interlock and make the page read-only so
         * we can then clear the object flags.
         *
@@ -578,8 +651,11 @@
        curgeneration = object->generation;
 
        for(p = TAILQ_FIRST(&object->memq); p; p = np) {
+               int n;
+
                np = TAILQ_NEXT(p, listq);
 
+again:
                pi = p->pindex;
                if (((p->flags & PG_CLEANCHK) == 0) ||
                        (pi < tstart) || (pi >= tend) ||
@@ -605,17 +681,86 @@
                        continue;
                }
 
-               s = splvm();
-               while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
-                       if (object->generation != curgeneration) {
-                               splx(s);
-                               goto rescan;
+               n = vm_object_page_collect_flush(object, p,
+                       curgeneration, pagerflags);
+               if (n == 0)
+                       goto rescan;
+               if (object->generation != curgeneration)
+                       goto rescan;
+
+               /*
+                * Try to optimize the next page.  If we can't we pick up
+                * our (random) scan where we left off.
+                */
+               if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQUENTIAL) {
+                       if ((p = vm_page_lookup(object, pi + n)) != NULL)
+                               goto again;
+               }
+       }
+
+#if 0
+       VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
+#endif
+
+       vm_object_clear_flag(object, OBJ_CLEANING);
+       return;
+}
+
+static int
+vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int 
+pagerflags)
+{
+       int runlen;
+       int s;
+       int maxf;
+       int chkb;
+       int maxb;
+       int i;
+       vm_pindex_t pi;
+       vm_page_t maf[vm_pageout_page_count];
+       vm_page_t mab[vm_pageout_page_count];
+       vm_page_t ma[vm_pageout_page_count];
+
+       s = splvm();
+       pi = p->pindex;
+       while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
+               if (object->generation != curgeneration) {
+                       splx(s);
+                       return(0);
+               }
+       }
+
+       maxf = 0;
+       for(i = 1; i < vm_pageout_page_count; i++) {
+               vm_page_t tp;
+
+               if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
+                       if ((tp->flags & PG_BUSY) ||
+                               (tp->flags & PG_CLEANCHK) == 0 ||
+                               (tp->busy != 0))
+                               break;
+                       if((tp->queue - tp->pc) == PQ_CACHE) {
+                               vm_page_flag_clear(tp, PG_CLEANCHK);
+                               break;
+                       }
+                       vm_page_test_dirty(tp);
+                       if ((tp->dirty & tp->valid) == 0) {
+                               vm_page_flag_clear(tp, PG_CLEANCHK);
+                               break;
                        }
+                       maf[ i - 1 ] = tp;
+                       maxf++;
+                       continue;
                }
+               break;
+       }
+
+       maxb = 0;
+       chkb = vm_pageout_page_count -  maxf;
+       if (chkb) {
+               for(i = 1; i < chkb;i++) {
+                       vm_page_t tp;
 
-               maxf = 0;
-               for(i=1;i<vm_pageout_page_count;i++) {
-                       if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
+                       if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
                                if ((tp->flags & PG_BUSY) ||
                                        (tp->flags & PG_CLEANCHK) == 0 ||
                                        (tp->busy != 0))
@@ -629,71 +774,45 @@
                                        vm_page_flag_clear(tp, PG_CLEANCHK);
                                        break;
                                }
-                               maf[ i - 1 ] = tp;
-                               maxf++;
+                               mab[ i - 1 ] = tp;
+                               maxb++;
                                continue;
                        }
                        break;
                }
+       }
 
-               maxb = 0;
-               chkb = vm_pageout_page_count -  maxf;
-               if (chkb) {
-                       for(i = 1; i < chkb;i++) {
-                               if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
-                                       if ((tp->flags & PG_BUSY) ||
-                                               (tp->flags & PG_CLEANCHK) == 0 ||
-                                               (tp->busy != 0))
-                                               break;
-                                       if((tp->queue - tp->pc) == PQ_CACHE) {
-                                               vm_page_flag_clear(tp, PG_CLEANCHK);
-                                               break;
-                                       }
-                                       vm_page_test_dirty(tp);
-                                       if ((tp->dirty & tp->valid) == 0) {
-                                               vm_page_flag_clear(tp, PG_CLEANCHK);
-                                               break;
-                                       }
-                                       mab[ i - 1 ] = tp;
-                                       maxb++;
-                                       continue;
-                               }
-                               break;
-                       }
-               }
+       for(i = 0; i < maxb; i++) {
+               int index = (maxb - i) - 1;
+               ma[index] = mab[i];
+               vm_page_flag_clear(ma[index], PG_CLEANCHK);
+       }
+       vm_page_flag_clear(p, PG_CLEANCHK);
+       ma[maxb] = p;
+       for(i = 0; i < maxf; i++) {
+               int index = (maxb + i) + 1;
+               ma[index] = maf[i];
+               vm_page_flag_clear(ma[index], PG_CLEANCHK);
+       }
+       runlen = maxb + maxf + 1;
 
-               for(i=0;i<maxb;i++) {
-                       int index = (maxb - i) - 1;
-                       ma[index] = mab[i];
-                       vm_page_flag_clear(ma[index], PG_CLEANCHK);
-               }
-               vm_page_flag_clear(p, PG_CLEANCHK);
-               ma[maxb] = p;
-               for(i=0;i<maxf;i++) {
-                       int index = (maxb + i) + 1;
-                       ma[index] = maf[i];
-                       vm_page_flag_clear(ma[index], PG_CLEANCHK);
-               }
-               runlen = maxb + maxf + 1;
-
-               splx(s);
-               vm_pageout_flush(ma, runlen, pagerflags);
-               for (i = 0; i<runlen; i++) {
-                       if (ma[i]->valid & ma[i]->dirty) {
-                               vm_page_protect(ma[i], VM_PROT_READ);
-                               vm_page_flag_set(ma[i], PG_CLEANCHK);
-                       }
+       splx(s);
+       vm_pageout_flush(ma, runlen, pagerflags);
+       for (i = 0; i < runlen; i++) {
+               if (ma[i]->valid & ma[i]->dirty) {
+                       vm_page_protect(ma[i], VM_PROT_READ);
+                       vm_page_flag_set(ma[i], PG_CLEANCHK);
+
+                       /*
+                        * maxf will end up being the actual number of pages
+                        * we wrote out contiguously, non-inclusive of the
+                        * first page.  We do not count look-behind pages.
+                        */
+                       if (i >= maxb + 1 && (maxf > i - maxb - 1))
+                               maxf = i - maxb - 1;
                }
-               if (object->generation != curgeneration)
-                       goto rescan;
        }
-
-#if 0
-       VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
-#endif
-
-       vm_object_clear_flag(object, OBJ_CLEANING);
-       return;
+       return(maxf + 1);
 }
 
 #ifdef not_used

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message

Reply via email to