This could explain a few bug reports I've had over the years in regards
    to systems leaking swap space.  Good find!

    Hmmm.  May I suggest an alternative?

        * Keep the part that changes vm->vm_refcnt == 1 to
          --vm->vm_refcnt == 0 when checking whether to free
          the underlying resources or not.  This introduces one
          extra ref count decrement.

        * Instead of breaking out the vmspace freeing code or adding
          any check fields, simply change the way vmspace_free() operates
          so instead of checking --vm->vm_refcnt == 0, it instead checks
          for vm->vm_refcnt == -1.

    Old vmspace_free() code:

        if (vm->vm_refcnt == 0)
                panic(...)

        if (--vm->vm_refcnt == 0) {
                ...
        }

    Suggested fix (pseudo code / incomplete):

        /*
         * One extra refcnt decrement occurs before freeing.  The
         * process that takes responsibility for releasing the 
         * vmspace resources decrements refcnt to 0, then the vmspace
         * is further decremented when released from cpu_wait().
         */
        if (vm->vm_refcnt <= -1)
                panic(...)

        if (--vm->vm_refcnt == -1) {
                ...
        }

    You might have to make other adjustments in the codebase, since there
    are a couple of other places where vm_refcnt is used 
    (fgrep vm_refcnt */*.c).  This is only a suggestion.  I have not
    tested it in any way.  We use a similar trick for the vnode ref count.

    I would be happy to review and test your final solution.

                                                -Matt

:...
:There's a certain issue that when several processes sharing a vmspace are 
:exiting at the same time, there is a race condition such that the shared 
:memory is going to be lost because the check for vm->vm_refcnt being the 
:check for the last decrement happening before the last decrement is
:actually performed, allowing for the possibility of Giant being dropped 
:(duh, during flushing of dirty pages), and all the trouble that entails...
:
:Anyway, here's what I currently have which seems to fix it.  Anyone want to 
:comment on its correctness?  The newly introduced vm_freer should be valid 
:to test against: it's only necessary to differentiate between multiple 
:holders of the same vmspace, so there shouldn't be any problem with recycled 
:proc pointers or anything.
:
:Index: kern/kern_exit.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/kern/kern_exit.c,v
:retrieving revision 1.123
:diff -u -r1.123 kern_exit.c
:--- kern/kern_exit.c   2001/03/28 11:52:53     1.123
:+++ kern/kern_exit.c   2001/04/29 23:47:36
:@@ -222,13 +222,14 @@
:        * Can't free the entire vmspace as the kernel stack
:        * may be mapped within that space also.
:        */
:-      if (vm->vm_refcnt == 1) {
:+      if (--vm->vm_refcnt == 0) {
:               if (vm->vm_shm)
:                       shmexit(p);
:               pmap_remove_pages(vmspace_pmap(vm), VM_MIN_ADDRESS,
:                   VM_MAXUSER_ADDRESS);
:               (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
:                   VM_MAXUSER_ADDRESS);
:+              vm->vm_freer = curproc;
:       }
: 
:       PROC_LOCK(p);
:@@ -379,7 +380,7 @@
:       /*
:        * Finally, call machine-dependent code to release the remaining
:        * resources including address space, the kernel stack and pcb.
:-       * The address space is released by "vmspace_free(p->p_vmspace)";
:+       * The address space is released by "vmspace_exitfree(p)";
:        * This is machine-dependent, as we may have to change stacks
:        * or ensure that the current one isn't reallocated before we
:        * finish.  cpu_exit will end with a call to cpu_switch(), finishing
:Index: vm/vm_map.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_map.c,v
:retrieving revision 1.198
:diff -u -r1.198 vm_map.c
:--- vm/vm_map.c        2001/04/12 21:50:03     1.198
:+++ vm/vm_map.c        2001/04/29 23:44:09
:@@ -178,6 +178,7 @@
:       vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
:       vm->vm_refcnt = 1;
:       vm->vm_shm = NULL;
:+      vm->vm_freer = NULL;
:       return (vm);
: }
: 
:@@ -194,6 +195,27 @@
:       vm_object_init2();
: }
: 
:+static __inline void
:+vmspace_dofree(vm)
:+      struct vmspace *vm;
:+{
:+
:+      /*
:+       * Lock the map, to wait out all other references to it.
:+       * Delete all of the mappings and pages they hold, then call
:+       * the pmap module to reclaim anything left.
:+       */
:+      vm_map_lock(&vm->vm_map);
:+      (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
:+              vm->vm_map.max_offset);
:+      vm_map_unlock(&vm->vm_map);
:+
:+      pmap_release(vmspace_pmap(vm));
:+      vm_map_destroy(&vm->vm_map);
:+      zfree(vmspace_zone, vm);
:+}
:+      
:+
: void
: vmspace_free(vm)
:       struct vmspace *vm;
:@@ -202,22 +224,17 @@
:       if (vm->vm_refcnt == 0)
:               panic("vmspace_free: attempt to free already freed vmspace");
: 
:-      if (--vm->vm_refcnt == 0) {
:+      if (--vm->vm_refcnt == 0)
:+              vmspace_dofree(vm);
:+}
: 
:-              /*
:-               * Lock the map, to wait out all other references to it.
:-               * Delete all of the mappings and pages they hold, then call
:-               * the pmap module to reclaim anything left.
:-               */
:-              vm_map_lock(&vm->vm_map);
:-              (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
:-                  vm->vm_map.max_offset);
:-              vm_map_unlock(&vm->vm_map);
:+void
:+vmspace_exitfree(p)
:+      struct proc *p;
:+{
: 
:-              pmap_release(vmspace_pmap(vm));
:-              vm_map_destroy(&vm->vm_map);
:-              zfree(vmspace_zone, vm);
:-      }
:+      if (p == p->p_vmspace->vm_freer)
:+              vmspace_dofree(p->p_vmspace);
: }
: 
: /*
:@@ -2128,7 +2145,7 @@
: 
:       vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
:       bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
:-          (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
:+          (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
:       new_map = &vm2->vm_map; /* XXX */
:       new_map->timestamp = 1;
: 
:Index: vm/vm_map.h
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_map.h,v
:retrieving revision 1.60
:diff -u -r1.60 vm_map.h
:--- vm/vm_map.h        2001/04/13 10:22:14     1.60
:+++ vm/vm_map.h        2001/04/29 23:26:50
:@@ -192,6 +192,8 @@
:       caddr_t vm_daddr;       /* user virtual address of data XXX */
:       caddr_t vm_maxsaddr;    /* user VA at max stack growth */
:       caddr_t vm_minsaddr;    /* user VA at max stack growth */
:+#define       vm_endcopy vm_freer
:+      struct proc *vm_freer;  /* vm freed on whose behalf */
: };
: 
: /*
:Index: vm/vm_extern.h
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_extern.h,v
:retrieving revision 1.47
:diff -u -r1.47 vm_extern.h
:--- vm/vm_extern.h     2000/03/13 10:47:24     1.47
:+++ vm/vm_extern.h     2001/04/29 23:29:09
:@@ -90,6 +90,7 @@
: void vmspace_exec __P((struct proc *));
: void vmspace_unshare __P((struct proc *));
: void vmspace_free __P((struct vmspace *));
:+void vmspace_exitfree __P((struct proc *));
: void vnode_pager_setsize __P((struct vnode *, vm_ooffset_t));
: void vslock __P((caddr_t, u_int));
: void vsunlock __P((caddr_t, u_int));
:Index: alpha/alpha/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/alpha/alpha/vm_machdep.c,v
:retrieving revision 1.47
:diff -u -r1.47 vm_machdep.c
:--- alpha/alpha/vm_machdep.c   2001/03/15 02:32:26     1.47
:+++ alpha/alpha/vm_machdep.c   2001/04/29 23:29:59
:@@ -273,7 +273,7 @@
:       pmap_dispose_proc(p);
: 
:       /* and clean-out the vmspace */
:-      vmspace_free(p->p_vmspace);
:+      vmspace_exitfree(p);
: }
: 
: /*
:Index: i386/i386/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/i386/i386/vm_machdep.c,v
:retrieving revision 1.154
:diff -u -r1.154 vm_machdep.c
:--- i386/i386/vm_machdep.c     2001/03/07 03:20:14     1.154
:+++ i386/i386/vm_machdep.c     2001/04/29 23:30:06
:@@ -282,7 +282,7 @@
:       pmap_dispose_proc(p);
: 
:       /* and clean-out the vmspace */
:-      vmspace_free(p->p_vmspace);
:+      vmspace_exitfree(p);
: }
: 
: /*
:Index: ia64/ia64/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/ia64/ia64/vm_machdep.c,v
:retrieving revision 1.16
:diff -u -r1.16 vm_machdep.c
:--- ia64/ia64/vm_machdep.c     2001/03/07 03:20:15     1.16
:+++ ia64/ia64/vm_machdep.c     2001/04/29 23:30:16
:@@ -324,7 +324,7 @@
:       pmap_dispose_proc(p);
: 
:       /* and clean-out the vmspace */
:-      vmspace_free(p->p_vmspace);
:+      vmspace_exitfree(p);
: }
: 
: /*
:
:
:-- 
: Brian Fundakowski Feldman           \  FreeBSD: The Power to Serve!  /
: [EMAIL PROTECTED]                    `------------------------------'

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message

Reply via email to