This could explain a few bug reports I've had over the years in regards
to systems leaking swap space. Good find!
Hmmm. May I suggest an alternative?
* Keep the part that changes vm->vm_refcnt == 1 to
--vm->vm_refcnt == 0 when checking whether to free
the underlying resources or not. This introduces one
extra ref count decrement.
* Instead of breaking out the vmspace freeing code or adding
any check fields, simply change the way vmspace_free() operates
so instead of checking --vm->vm_refcnt == 0, it instead checks
for vm->vm_refcnt == -1.
Old vmspace_free() code:
if (vm->vm_refcnt == 0)
panic(...)
if (--vm->vm_refcnt == 0) {
...
}
Suggested fix (pseudo code / incomplete):
/*
* One extra refcnt decrement occurs before freeing. The
* process that takes responsibility for releasing the
* vmspace resources decrements refcnt to 0, then the vmspace
* is further decremented when released from cpu_wait().
*/
if (vm->vm_refcnt <= -1)
panic(...)
if (--vm->vm_refcnt == -1) {
...
}
You might have to make other adjustments in the codebase, since there
are a couple of other places where vm_refcnt is used
(fgrep vm_refcnt */*.c). This is only a suggestion. I have not
tested it in any way. We use a similar trick for the vnode ref count.
I would be happy to review and test your final solution.
-Matt
:...
:There's a certain issue that when several processes sharing a vmspace are
:exiting at the same time, there is a race condition such that the shared
:memory is going to be lost because the check for vm->vm_refcnt being the
:check for the last decrement happening before the last decrement is
:actually performed, allowing for the possibility of Giant being dropped
:(duh, during flushing of dirty pages), and all the trouble that entails...
:
:Anyway, here's what I currently have which seems to fix it. Anyone want to
:comment on its correctness? The newly introduced vm_freer should be valid
:to test against: it's only necessary to differentiate between multiple
:holders of the same vmspace, so there shouldn't be any problem with recycled
:proc pointers or anything.
:
:Index: kern/kern_exit.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/kern/kern_exit.c,v
:retrieving revision 1.123
:diff -u -r1.123 kern_exit.c
:--- kern/kern_exit.c 2001/03/28 11:52:53 1.123
:+++ kern/kern_exit.c 2001/04/29 23:47:36
:@@ -222,13 +222,14 @@
: * Can't free the entire vmspace as the kernel stack
: * may be mapped within that space also.
: */
:- if (vm->vm_refcnt == 1) {
:+ if (--vm->vm_refcnt == 0) {
: if (vm->vm_shm)
: shmexit(p);
: pmap_remove_pages(vmspace_pmap(vm), VM_MIN_ADDRESS,
: VM_MAXUSER_ADDRESS);
: (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
: VM_MAXUSER_ADDRESS);
:+ vm->vm_freer = curproc;
: }
:
: PROC_LOCK(p);
:@@ -379,7 +380,7 @@
: /*
: * Finally, call machine-dependent code to release the remaining
: * resources including address space, the kernel stack and pcb.
:- * The address space is released by "vmspace_free(p->p_vmspace)";
:+ * The address space is released by "vmspace_exitfree(p)";
: * This is machine-dependent, as we may have to change stacks
: * or ensure that the current one isn't reallocated before we
: * finish. cpu_exit will end with a call to cpu_switch(), finishing
:Index: vm/vm_map.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_map.c,v
:retrieving revision 1.198
:diff -u -r1.198 vm_map.c
:--- vm/vm_map.c 2001/04/12 21:50:03 1.198
:+++ vm/vm_map.c 2001/04/29 23:44:09
:@@ -178,6 +178,7 @@
: vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */
: vm->vm_refcnt = 1;
: vm->vm_shm = NULL;
:+ vm->vm_freer = NULL;
: return (vm);
: }
:
:@@ -194,6 +195,27 @@
: vm_object_init2();
: }
:
:+static __inline void
:+vmspace_dofree(vm)
:+ struct vmspace *vm;
:+{
:+
:+ /*
:+ * Lock the map, to wait out all other references to it.
:+ * Delete all of the mappings and pages they hold, then call
:+ * the pmap module to reclaim anything left.
:+ */
:+ vm_map_lock(&vm->vm_map);
:+ (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
:+ vm->vm_map.max_offset);
:+ vm_map_unlock(&vm->vm_map);
:+
:+ pmap_release(vmspace_pmap(vm));
:+ vm_map_destroy(&vm->vm_map);
:+ zfree(vmspace_zone, vm);
:+}
:+
:+
: void
: vmspace_free(vm)
: struct vmspace *vm;
:@@ -202,22 +224,17 @@
: if (vm->vm_refcnt == 0)
: panic("vmspace_free: attempt to free already freed vmspace");
:
:- if (--vm->vm_refcnt == 0) {
:+ if (--vm->vm_refcnt == 0)
:+ vmspace_dofree(vm);
:+}
:
:- /*
:- * Lock the map, to wait out all other references to it.
:- * Delete all of the mappings and pages they hold, then call
:- * the pmap module to reclaim anything left.
:- */
:- vm_map_lock(&vm->vm_map);
:- (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
:- vm->vm_map.max_offset);
:- vm_map_unlock(&vm->vm_map);
:+void
:+vmspace_exitfree(p)
:+ struct proc *p;
:+{
:
:- pmap_release(vmspace_pmap(vm));
:- vm_map_destroy(&vm->vm_map);
:- zfree(vmspace_zone, vm);
:- }
:+ if (p == p->p_vmspace->vm_freer)
:+ vmspace_dofree(p->p_vmspace);
: }
:
: /*
:@@ -2128,7 +2145,7 @@
:
: vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
: bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
:- (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
:+ (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
: new_map = &vm2->vm_map; /* XXX */
: new_map->timestamp = 1;
:
:Index: vm/vm_map.h
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_map.h,v
:retrieving revision 1.60
:diff -u -r1.60 vm_map.h
:--- vm/vm_map.h 2001/04/13 10:22:14 1.60
:+++ vm/vm_map.h 2001/04/29 23:26:50
:@@ -192,6 +192,8 @@
: caddr_t vm_daddr; /* user virtual address of data XXX */
: caddr_t vm_maxsaddr; /* user VA at max stack growth */
: caddr_t vm_minsaddr; /* user VA at max stack growth */
:+#define vm_endcopy vm_freer
:+ struct proc *vm_freer; /* vm freed on whose behalf */
: };
:
: /*
:Index: vm/vm_extern.h
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_extern.h,v
:retrieving revision 1.47
:diff -u -r1.47 vm_extern.h
:--- vm/vm_extern.h 2000/03/13 10:47:24 1.47
:+++ vm/vm_extern.h 2001/04/29 23:29:09
:@@ -90,6 +90,7 @@
: void vmspace_exec __P((struct proc *));
: void vmspace_unshare __P((struct proc *));
: void vmspace_free __P((struct vmspace *));
:+void vmspace_exitfree __P((struct proc *));
: void vnode_pager_setsize __P((struct vnode *, vm_ooffset_t));
: void vslock __P((caddr_t, u_int));
: void vsunlock __P((caddr_t, u_int));
:Index: alpha/alpha/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/alpha/alpha/vm_machdep.c,v
:retrieving revision 1.47
:diff -u -r1.47 vm_machdep.c
:--- alpha/alpha/vm_machdep.c 2001/03/15 02:32:26 1.47
:+++ alpha/alpha/vm_machdep.c 2001/04/29 23:29:59
:@@ -273,7 +273,7 @@
: pmap_dispose_proc(p);
:
: /* and clean-out the vmspace */
:- vmspace_free(p->p_vmspace);
:+ vmspace_exitfree(p);
: }
:
: /*
:Index: i386/i386/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/i386/i386/vm_machdep.c,v
:retrieving revision 1.154
:diff -u -r1.154 vm_machdep.c
:--- i386/i386/vm_machdep.c 2001/03/07 03:20:14 1.154
:+++ i386/i386/vm_machdep.c 2001/04/29 23:30:06
:@@ -282,7 +282,7 @@
: pmap_dispose_proc(p);
:
: /* and clean-out the vmspace */
:- vmspace_free(p->p_vmspace);
:+ vmspace_exitfree(p);
: }
:
: /*
:Index: ia64/ia64/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/ia64/ia64/vm_machdep.c,v
:retrieving revision 1.16
:diff -u -r1.16 vm_machdep.c
:--- ia64/ia64/vm_machdep.c 2001/03/07 03:20:15 1.16
:+++ ia64/ia64/vm_machdep.c 2001/04/29 23:30:16
:@@ -324,7 +324,7 @@
: pmap_dispose_proc(p);
:
: /* and clean-out the vmspace */
:- vmspace_free(p->p_vmspace);
:+ vmspace_exitfree(p);
: }
:
: /*
:
:
:--
: Brian Fundakowski Feldman \ FreeBSD: The Power to Serve! /
: [EMAIL PROTECTED] `------------------------------'
To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message