Hey everyone,
I spent some time working on getting cdjones' memory limit patches
updated for 7.0 and beyond and thought I'd post my progress. I've
attached my current patch which implements memory limits on 7.0-RELEASE,
but only for the older (and default in -RELEASE) bsd4 scheduler (won't
work at all on ULE). I haven't yet started work for ULE or getting CPU
sharing working. This patch also includes fixes for problems in the
original cdjones patches. If you want to give it a whirl it should apply
cleanly to a 7.0-RELEASE source tree and if you run into any issues let
me know.
- Chris
diff -burN src.old/lib/libc/sys/Symbol.map src.new/lib/libc/sys/Symbol.map
--- src.old/lib/libc/sys/Symbol.map 2007-08-21 21:56:35.000000000 -0400
+++ src.new/lib/libc/sys/Symbol.map 2008-05-28 19:55:04.000000000 -0400
@@ -131,6 +131,7 @@
issetugid;
jail;
jail_attach;
+ jail_set_resource_limits;
kenv;
kevent;
kill;
@@ -580,6 +581,8 @@
__sys_jail;
_jail_attach;
__sys_jail_attach;
+ _jail_set_resource_limits;
+ __sys_jail_set_resource_limits;
_kenv;
__sys_kenv;
_kevent;
diff -burN src.old/sys/kern/init_sysent.c src.new/sys/kern/init_sysent.c
--- src.old/sys/kern/init_sysent.c 2007-08-16 01:32:25.000000000 -0400
+++ src.new/sys/kern/init_sysent.c 2008-05-28 19:49:37.000000000 -0400
@@ -2,8 +2,8 @@
* System call switch table.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/init_sysent.c,v 1.230 2007/08/16 05:32:25 davidxu Exp $
- * created from FreeBSD: src/sys/kern/syscalls.master,v 1.232 2007/07/04 22:47:37 peter Exp
+ * $FreeBSD$
+ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.233 2007/08/16 05:26:41 davidxu Exp
*/
#include "opt_compat.h"
@@ -511,4 +511,5 @@
{ AS(truncate_args), (sy_call_t *)truncate, AUE_TRUNCATE, NULL, 0, 0 }, /* 479 = truncate */
{ AS(ftruncate_args), (sy_call_t *)ftruncate, AUE_FTRUNCATE, NULL, 0, 0 }, /* 480 = ftruncate */
{ AS(thr_kill2_args), (sy_call_t *)thr_kill2, AUE_KILL, NULL, 0, 0 }, /* 481 = thr_kill2 */
+ { AS(jail_set_resource_limits_args), (sy_call_t *)jail_set_resource_limits, AUE_NULL, NULL, 0, 0 }, /* 482 = jail_set_resource_limits */
};
diff -burN src.old/sys/kern/kern_jail.c src.new/sys/kern/kern_jail.c
--- src.old/sys/kern/kern_jail.c 2007-04-13 19:54:22.000000000 -0400
+++ src.new/sys/kern/kern_jail.c 2008-06-19 03:16:43.000000000 -0400
@@ -5,8 +5,38 @@
* can do whatever you want with this stuff. If we meet some day, and you think
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
* ----------------------------------------------------------------------------
+ *
+ * Portions copyright (c) 2006 Chris Jones,
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Chris Jones
+ * thanks to the support of Google's Summer of Code program and
+ * mentoring by Kip Macy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
*/
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/sys/kern/kern_jail.c,v 1.70 2007/04/13 23:54:22 pjd Exp $");
@@ -15,6 +45,7 @@
#include <sys/param.h>
#include <sys/types.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/sysproto.h>
@@ -33,6 +64,12 @@
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
#include <net/if.h>
#include <netinet/in.h>
@@ -78,12 +115,27 @@
&jail_mount_allowed, 0,
"Processes in jail can mount/unmount jail-friendly file systems");
+int jail_limit_memory = 0;
+SYSCTL_INT(_security_jail, OID_AUTO, limit_jail_memory, CTLFLAG_RW,
+ &jail_limit_memory, 0,
+ "Limit jails' memory usage");
+
+int jail_memory_pager_interval = 5;
+SYSCTL_INT(_security_jail, OID_AUTO, jail_pager_interval,
+ CTLTYPE_INT | CTLFLAG_RW,
+ &jail_memory_pager_interval, 0,
+ "Interval between jail memory limit checks");
+
+
/* allprison, lastprid, and prisoncount are protected by allprison_lock. */
struct prisonlist allprison;
struct sx allprison_lock;
int lastprid = 0;
int prisoncount = 0;
+/* Make the sched_lock visible */
+extern struct mtx sched_lock;
+
/*
* List of jail services. Protected by allprison_lock.
*/
@@ -114,6 +166,104 @@
SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
+static void
+jpager_td(void *arg)
+{
+ struct proc *p;
+ struct prison *pr = arg;
+ struct thread *td;
+ long limit, cursize, newsize, usage;
+ int breakout;
+ int flags = J_PAGER_TD_ACTIVE;
+ pr->pr_pager_flags_ptr = &flags;
+
+ for (;;) {
+ if (flags & J_PAGER_TD_DIE)
+ break;
+
+ if (jail_limit_memory && pr->pr_mem_limit) {
+ /*
+ * TODO: consider whether it might be better to start
+ * pushing back when we approach the limit, rather than
+ * when we hit it.
+ *
+ */
+ limit = prison_memory_limit(pr);
+ usage = prison_memory(pr);
+
+ /* Copy the current memory usage to the prison struct */
+ mtx_lock(&pr->pr_mtx);
+ pr->pr_mem_usage = usage;
+ mtx_unlock(&pr->pr_mtx);
+
+ /*
+ * The logic from vm_daemon() really needs to go here.
+ * Problem: we want to push things below their rlimits,
+ * and vm_daemon doesn't do that. It'd be better to
+ * refactor vm_daemon to fit, but this'll do for now.
+ *
+ */
+
+ if ((usage - limit) > 0) {
+ sx_slock(&allproc_lock);
+ LIST_FOREACH(p, &allproc, p_list) {
+
+ if (pr != p->p_ucred->cr_prison || !p->p_vmspace)
+ continue;
+
+ PROC_LOCK(p);
+ if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+
+ mtx_lock_spin(&sched_lock);
+ breakout = 0;
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (!TD_ON_RUNQ(td) &&
+ !TD_IS_RUNNING(td) &&
+ !TD_IS_SLEEPING(td)) {
+ breakout = 1;
+ break;
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+ if (breakout) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+
+ /* NOTE: we differ here from vm_daemon b/c we don't
+ * care about the rlimit; things that are exceeding that will
+ * get caught in due course. We need, however, to decrease
+ * the pressure on our permitted memory allocation. Fortunately,
+ * we only care about eventually hitting the limit, so if we
+ * don't get there right away, it's okay.
+ */
+
+ /* TODO: this arbitrarily reduces each process's space by
+ * 6.25% (until it's completely swapped out) while
+ * we're under memory pressure. A better way would be
+ * to either hit large processes first, or to hit the
+ * least-active processes first, or go proportionally,
+ * or ....
+ */
+ newsize = cursize = vmspace_resident_count(p->p_vmspace);
+ newsize -= newsize / 16;
+ if (cursize < 0)
+ newsize = 0;
+ PROC_UNLOCK(p);
+ vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, newsize);
+ } /* end LIST_FOREACH procs */
+ sx_sunlock(&allproc_lock);
+ }
+ }
+ tsleep(pr, 0, "-", jail_memory_pager_interval * hz);
+ }
+
+ kthread_exit(0);
+}
+
/*
* struct jail_args {
* struct jail *jail;
@@ -127,6 +277,7 @@
struct prison_service *psrv;
struct jail j;
struct jail_attach_args jaa;
+ struct proc *j_pager_proc = NULL;
int vfslocked, error, tryprid;
error = copyin(uap->jail, &j, sizeof(j));
@@ -135,6 +286,7 @@
if (j.version != 0)
return (EINVAL);
+
MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
pr->pr_ref = 1;
@@ -156,7 +308,10 @@
goto e_dropvnref;
pr->pr_ip = j.ip_number;
pr->pr_linux = NULL;
+ pr->pr_sched_shares = j.sched_shares;
pr->pr_securelevel = securelevel;
+ pr->pr_mem_limit = j.mem_limit;
+
if (prison_service_slots == 0)
pr->pr_slots = NULL;
else {
@@ -169,6 +324,7 @@
tryprid = lastprid + 1;
if (tryprid == JAIL_MAX)
tryprid = 1;
+
next:
LIST_FOREACH(tpr, &allprison, pr_list) {
if (tpr->pr_id == tryprid) {
@@ -190,6 +346,11 @@
}
sx_sunlock(&allprison_lock);
+ if (kthread_create(jpager_td, pr, (void *) j_pager_proc, 0, 0, "jpager %d", pr->pr_id))
+ goto e_dropprref;
+ KASSERT(j_pager_proc != NULL, ("NULL j_pager_proc"));
+ pr->pr_pager = j_pager_proc;
+
error = jail_attach(td, &jaa);
if (error)
goto e_dropprref;
@@ -199,6 +360,11 @@
td->td_retval[0] = jaa.jid;
return (0);
e_dropprref:
+ if (j_pager_proc != NULL) {
+ *pr->pr_pager_flags_ptr = J_PAGER_TD_DIE;
+ wakeup(pr);
+ }
+
sx_xlock(&allprison_lock);
LIST_REMOVE(pr, pr_list);
prisoncount--;
@@ -267,11 +433,13 @@
newcred = crget();
PROC_LOCK(p);
+
oldcred = p->p_ucred;
setsugid(p);
crcopy(newcred, oldcred);
newcred->cr_prison = pr;
p->p_ucred = newcred;
+
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -314,6 +482,9 @@
pr->pr_ref--;
if (pr->pr_ref == 0) {
mtx_unlock(&pr->pr_mtx);
+ *pr->pr_pager_flags_ptr = J_PAGER_TD_DIE;
+ wakeup(pr);
+
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
return;
@@ -436,6 +607,92 @@
return (ok);
}
+/* Given credential, return memory usage in bytes. */
+long
+prison_memory(struct prison *pr)
+{
+ struct proc *p;
+ long mem_used = 0;
+
+ /*
+ * TODO: this is a really bad way of doing the
+ * search, as we end up going across all processes
+ * for each jail. It'd be more efficient to just do
+ * this once in a period and update the relevant jail.
+ *
+ */
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (!jailed(p->p_ucred) ||
+ (pr != p->p_ucred->cr_prison) ||
+ !p->p_vmspace) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ mem_used += vmspace_resident_count(p->p_vmspace);
+ PROC_UNLOCK(p);
+ }
+ mem_used *= PAGE_SIZE;
+ return mem_used;
+}
+
+/* Given credential, return permitted memory usage in bytes. */
+long
+prison_memory_limit(struct prison *pr)
+{
+ vm_pindex_t memlimit;
+ mtx_lock(&pr->pr_mtx);
+ memlimit = (vm_pindex_t) pr->pr_mem_limit;
+ mtx_unlock(&pr->pr_mtx);
+ return memlimit;
+}
+
+/*
+ * Change resource limit for a prison.
+ *
+ * unsigned int jid: id of jail to mess with
+ *
+ * int cpushares: 0 -> remove prison from cpu limits
+ * -1 -> don't change existing shares
+ * >0 -> set cpu shares
+ *
+ * int memlimit: 0 -> remove prison from mem limits
+ * -1 -> don't change existing limit
+ * >1 -> set memory limit (bytes)
+ *
+ * TODO: might this be better handled via a writable
+ * sysctl than with a new syscall?
+ */
+int
+jail_set_resource_limits(struct thread *td, struct jail_set_resource_limits_args *uap)
+{
+ struct prison *pr;
+ int error;
+
+ error = suser(td);
+ if (error)
+ return (error);
+
+ sx_xlock(&allprison_lock);
+ LIST_FOREACH(pr, &allprison, pr_list) {
+ if (pr->pr_id == uap->jid)
+ break;
+ }
+ if (NULL == pr) {
+ sx_unlock(&allprison_lock);
+ return 1;
+ }
+
+ mtx_lock(&pr->pr_mtx);
+ if (-1 != uap->cpushares)
+ pr->pr_sched_shares = uap->cpushares;
+ if (-1 != uap->memlimit)
+ pr->pr_mem_limit = uap->memlimit;
+ mtx_unlock(&pr->pr_mtx);
+ sx_unlock(&allprison_lock);
+ return 0;
+}
+
/*
* Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
*/
@@ -955,9 +1212,15 @@
xp->pr_id = pr->pr_id;
xp->pr_ip = pr->pr_ip;
strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
+
mtx_lock(&pr->pr_mtx);
+ xp->pr_sched_shares = pr->pr_sched_shares;
+ xp->pr_estcpu = pr->pr_estcpu;
+ xp->pr_mem_limit = pr->pr_mem_limit;
+ xp->pr_mem_usage = pr->pr_mem_usage;
strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
mtx_unlock(&pr->pr_mtx);
+
xp++;
}
sx_sunlock(&allprison_lock);
diff -burN src.old/sys/kern/syscalls.c src.new/sys/kern/syscalls.c
--- src.old/sys/kern/syscalls.c 2007-08-16 01:32:26.000000000 -0400
+++ src.new/sys/kern/syscalls.c 2008-05-28 19:49:37.000000000 -0400
@@ -2,8 +2,8 @@
* System call names.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/syscalls.c,v 1.214 2007/08/16 05:32:26 davidxu Exp $
- * created from FreeBSD: src/sys/kern/syscalls.master,v 1.232 2007/07/04 22:47:37 peter Exp
+ * $FreeBSD$
+ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.233 2007/08/16 05:26:41 davidxu Exp
*/
const char *syscallnames[] = {
@@ -489,4 +489,5 @@
"truncate", /* 479 = truncate */
"ftruncate", /* 480 = ftruncate */
"thr_kill2", /* 481 = thr_kill2 */
+ "jail_set_resource_limits", /* 482 = jail_set_resource_limits */
};
diff -burN src.old/sys/kern/syscalls.master src.new/sys/kern/syscalls.master
--- src.old/sys/kern/syscalls.master 2007-08-16 01:26:41.000000000 -0400
+++ src.new/sys/kern/syscalls.master 2008-05-28 11:03:25.000000000 -0400
@@ -847,5 +847,7 @@
479 AUE_TRUNCATE STD { int truncate(char *path, off_t length); }
480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); }
481 AUE_KILL STD { int thr_kill2(pid_t pid, long id, int sig); }
+482 AUE_NULL STD { int jail_set_resource_limits(unsigned int jid, \
+ int cpushares, int memlimit); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff -burN src.old/sys/kern/systrace_args.c src.new/sys/kern/systrace_args.c
--- src.old/sys/kern/systrace_args.c 2007-08-16 01:32:26.000000000 -0400
+++ src.new/sys/kern/systrace_args.c 2008-05-28 19:49:37.000000000 -0400
@@ -2,7 +2,7 @@
* System call argument to DTrace register array converstion.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/systrace_args.c,v 1.14 2007/08/16 05:32:26 davidxu Exp $
+ * $FreeBSD$
* This file is part of the DTrace syscall provider.
*/
@@ -2871,6 +2871,15 @@
*n_args = 3;
break;
}
+ /* jail_set_resource_limits */
+ case 482: {
+ struct jail_set_resource_limits_args *p = params;
+ uarg[0] = p->jid; /* unsigned int */
+ iarg[1] = p->cpushares; /* int */
+ iarg[2] = p->memlimit; /* int */
+ *n_args = 3;
+ break;
+ }
default:
*n_args = 0;
break;
diff -burN src.old/sys/sys/jail.h src.new/sys/sys/jail.h
--- src.old/sys/sys/jail.h 2007-04-05 19:19:13.000000000 -0400
+++ src.new/sys/sys/jail.h 2008-05-28 09:35:21.000000000 -0400
@@ -18,6 +18,8 @@
char *path;
char *hostname;
u_int32_t ip_number;
+ unsigned int sched_shares;
+ unsigned int mem_limit;
};
struct xprison {
@@ -26,13 +28,24 @@
char pr_path[MAXPATHLEN];
char pr_host[MAXHOSTNAMELEN];
u_int32_t pr_ip;
+ unsigned int pr_sched_shares;
+ unsigned int pr_estcpu;
+ unsigned int pr_mem_limit;
+ unsigned int pr_mem_usage;
};
-#define XPRISON_VERSION 1
+#define XPRISON_VERSION 2
+
+#define JAIL_MINIMUM_SHARES 1
+
+#define J_PAGER_TD_ACTIVE 0x01
+#define J_PAGER_TD_DIE 0x02
+#define J_PAGER_TD_DEAD 0x04
#ifndef _KERNEL
int jail(struct jail *);
int jail_attach(int);
+int jail_set_resource_limits(unsigned int, int, int);
#else /* _KERNEL */
@@ -73,6 +86,12 @@
int pr_securelevel; /* (p) securelevel */
struct task pr_task; /* (d) destroy task */
struct mtx pr_mtx;
+ u_int32_t pr_sched_shares; /* (p) jail priority */
+ u_int pr_estcpu; /* (p) est. cpu of jail */
+ struct proc *pr_pager; /* (c) pager pid */
+ int *pr_pager_flags_ptr; /* (p) communication to pager */
+ size_t pr_mem_limit; /* (p) memory allocation limit */
+ size_t pr_mem_usage; /* (p) memory in use */
void **pr_slots; /* (p) additional data */
};
#endif /* _KERNEL || _WANT_PRISON */
@@ -113,6 +132,8 @@
void prison_hold(struct prison *pr);
int prison_if(struct ucred *cred, struct sockaddr *sa);
int prison_ip(struct ucred *cred, int flag, u_int32_t *ip);
+long prison_memory(struct prison *pr);
+long prison_memory_limit(struct prison *pr);
int prison_priv_check(struct ucred *cred, int priv);
void prison_remote_ip(struct ucred *cred, int flags, u_int32_t *ip);
diff -burN src.old/sys/sys/syscall.h src.new/sys/sys/syscall.h
--- src.old/sys/sys/syscall.h 2007-08-16 01:32:26.000000000 -0400
+++ src.new/sys/sys/syscall.h 2008-05-28 19:49:37.000000000 -0400
@@ -2,8 +2,8 @@
* System call numbers.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/sys/syscall.h,v 1.211 2007/08/16 05:32:26 davidxu Exp $
- * created from FreeBSD: src/sys/kern/syscalls.master,v 1.232 2007/07/04 22:47:37 peter Exp
+ * $FreeBSD$
+ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.233 2007/08/16 05:26:41 davidxu Exp
*/
#define SYS_syscall 0
@@ -401,4 +401,5 @@
#define SYS_truncate 479
#define SYS_ftruncate 480
#define SYS_thr_kill2 481
-#define SYS_MAXSYSCALL 482
+#define SYS_jail_set_resource_limits 482
+#define SYS_MAXSYSCALL 483
diff -burN src.old/sys/sys/syscall.mk src.new/sys/sys/syscall.mk
--- src.old/sys/sys/syscall.mk 2007-08-16 01:32:26.000000000 -0400
+++ src.new/sys/sys/syscall.mk 2008-05-28 19:49:37.000000000 -0400
@@ -1,7 +1,7 @@
# FreeBSD system call names.
# DO NOT EDIT-- this file is automatically generated.
-# $FreeBSD: src/sys/sys/syscall.mk,v 1.166 2007/08/16 05:32:26 davidxu Exp $
-# created from FreeBSD: src/sys/kern/syscalls.master,v 1.232 2007/07/04 22:47:37 peter Exp
+# $FreeBSD$
+# created from FreeBSD: src/sys/kern/syscalls.master,v 1.233 2007/08/16 05:26:41 davidxu Exp
MIASM = \
syscall.o \
exit.o \
@@ -349,4 +349,5 @@
lseek.o \
truncate.o \
ftruncate.o \
- thr_kill2.o
+ thr_kill2.o \
+ jail_set_resource_limits.o
diff -burN src.old/sys/sys/sysproto.h src.new/sys/sys/sysproto.h
--- src.old/sys/sys/sysproto.h 2007-08-16 01:32:26.000000000 -0400
+++ src.new/sys/sys/sysproto.h 2008-05-28 19:49:37.000000000 -0400
@@ -2,8 +2,8 @@
* System call prototypes.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/sys/sysproto.h,v 1.215 2007/08/16 05:32:26 davidxu Exp $
- * created from FreeBSD: src/sys/kern/syscalls.master,v 1.232 2007/07/04 22:47:37 peter Exp
+ * $FreeBSD$
+ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.233 2007/08/16 05:26:41 davidxu Exp
*/
#ifndef _SYS_SYSPROTO_H_
@@ -1520,6 +1520,11 @@
char id_l_[PADL_(long)]; long id; char id_r_[PADR_(long)];
char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
};
+struct jail_set_resource_limits_args {
+ char jid_l_[PADL_(unsigned int)]; unsigned int jid; char jid_r_[PADR_(unsigned int)];
+ char cpushares_l_[PADL_(int)]; int cpushares; char cpushares_r_[PADR_(int)];
+ char memlimit_l_[PADL_(int)]; int memlimit; char memlimit_r_[PADR_(int)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_exit(struct thread *, struct sys_exit_args *);
int fork(struct thread *, struct fork_args *);
@@ -1859,6 +1864,7 @@
int truncate(struct thread *, struct truncate_args *);
int ftruncate(struct thread *, struct ftruncate_args *);
int thr_kill2(struct thread *, struct thr_kill2_args *);
+int jail_set_resource_limits(struct thread *, struct jail_set_resource_limits_args *);
#ifdef COMPAT_43
@@ -2423,6 +2429,7 @@
#define SYS_AUE_truncate AUE_TRUNCATE
#define SYS_AUE_ftruncate AUE_FTRUNCATE
#define SYS_AUE_thr_kill2 AUE_KILL
+#define SYS_AUE_jail_set_resource_limits AUE_NULL
#undef PAD_
#undef PADL_
diff -burN src.old/sys/vm/vm_pageout.c src.new/sys/vm/vm_pageout.c
--- src.old/sys/vm/vm_pageout.c 2007-09-25 02:25:06.000000000 -0400
+++ src.new/sys/vm/vm_pageout.c 2008-05-28 13:05:44.000000000 -0400
@@ -208,7 +208,6 @@
int vm_page_max_wired; /* XXX max # of wired pages system-wide */
#if !defined(NO_SWAPPING)
-static void vm_pageout_map_deactivate_pages(vm_map_t, long);
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
static void vm_req_vmdaemon(int req);
#endif
@@ -594,7 +593,7 @@
* deactivate some number of pages in a map, try to do it fairly, but
* that is really hard to do.
*/
-static void
+void
vm_pageout_map_deactivate_pages(map, desired)
vm_map_t map;
long desired;
diff -burN src.old/sys/vm/vm_pageout.h src.new/sys/vm/vm_pageout.h
--- src.old/sys/vm/vm_pageout.h 2005-01-06 21:29:27.000000000 -0500
+++ src.new/sys/vm/vm_pageout.h 2008-05-28 09:37:17.000000000 -0400
@@ -87,6 +87,8 @@
* Exported routines.
*/
+void vm_pageout_map_deactivate_pages(vm_map_t map, long desired);
+
/*
* Signal pageout-daemon and wait for it.
*/
diff -burN src.old/usr.sbin/jail/jail.8 src.new/usr.sbin/jail/jail.8
--- src.old/usr.sbin/jail/jail.8 2007-04-05 17:17:52.000000000 -0400
+++ src.new/usr.sbin/jail/jail.8 2008-05-28 19:58:58.000000000 -0400
@@ -45,6 +45,8 @@
.Op Fl J Ar jid_file
.Op Fl s Ar securelevel
.Op Fl l u Ar username | Fl U Ar username
+.Op Fl S Ar cpu_shares
+.Op Fl M Ar mem_limit
.Ar path hostname ip-number command ...
.Sh DESCRIPTION
The
@@ -88,6 +90,10 @@
The user name from jailed environment as whom the
.Ar command
should run.
+.It Fl S Ar cpu_shares
+CPU shares to assign to the prison.
+.It Fl M Ar mem_limit
+Amount of memory (in MB) to allow the prison to use.
.It Ar path
Directory which is to be the root of the prison.
.It Ar hostname
@@ -550,6 +556,17 @@
This MIB entry determines if a privileged user inside a jail will be
able to mount and unmount file system types marked as jail-friendly.
The
+.It Va security.jail.limit_jail_memory, Va security.jail.jail_pager_interval
+These MIB entries determine whether and how often (in seconds) a
+jail's memory-limit monitoring daemon will run, and consequently the
+period during which a jail can be overcommitted for resident memory.
+.It Va kern.sched.limit_jail_cpu
+This MIB entry sets whether CPU usage limits will be enforced
+against processes in jails with CPU limits.
+.It Va kern.sched.system_cpu_shares
+Number of CPU usage shares to allocate to unjailed processes for the
+purposes of determining CPU usage permitted for jailed processes.
+Unjailed processes are not subject to CPU usage limits.
.Xr lsvfs 1
command can be used to find file system types available for mount from within
a jail.
diff -burN src.old/usr.sbin/jail/jail.c src.new/usr.sbin/jail/jail.c
--- src.old/usr.sbin/jail/jail.c 2006-05-12 11:14:43.000000000 -0400
+++ src.new/usr.sbin/jail/jail.c 2008-05-28 10:02:59.000000000 -0400
@@ -56,6 +56,8 @@
struct in_addr in;
gid_t groups[NGROUPS];
int ch, i, iflag, Jflag, lflag, ngroups, securelevel, uflag, Uflag;
+ unsigned int mem_limit = 0;
+ unsigned int sched_shares = 0;
char path[PATH_MAX], *ep, *username, *JidFile;
static char *cleanenv;
const char *shell, *p = NULL;
@@ -67,7 +69,7 @@
username = JidFile = cleanenv = NULL;
fp = NULL;
- while ((ch = getopt(argc, argv, "ils:u:U:J:")) != -1) {
+ while ((ch = getopt(argc, argv, "ilS:M:s:u:U:J:")) != -1) {
switch (ch) {
case 'i':
iflag = 1;
@@ -76,6 +78,13 @@
JidFile = optarg;
Jflag = 1;
break;
+ case 'M':
+ mem_limit = atoi(optarg);
+ mem_limit *= 1024 * 1024;
+ break;
+ case 'S':
+ sched_shares = atoi(optarg);
+ break;
case 's':
ltmp = strtol(optarg, &ep, 0);
if (*ep || ep == optarg || ltmp > INT_MAX || !ltmp)
@@ -118,6 +127,8 @@
if (inet_aton(argv[2], &in) == 0)
errx(1, "Could not make sense of ip-number: %s", argv[2]);
j.ip_number = ntohl(in.s_addr);
+ j.mem_limit = mem_limit;
+ j.sched_shares = sched_shares;
if (Jflag) {
fp = fopen(JidFile, "w");
if (fp == NULL)
@@ -182,8 +193,10 @@
usage(void)
{
- (void)fprintf(stderr, "%s%s%s\n",
- "usage: jail [-i] [-J jid_file] [-s securelevel] [-l -u ",
+ (void)fprintf(stderr, "%s%s%s%s%s\n",
+ "usage: jail [-i] [-J jid_file] [-M mem_limit] ",
+ "[-S cpu_shares] [-s securelevel]",
+ " [-l -u ",
"username | -U username]",
" path hostname ip-number command ...");
exit(1);
diff -burN src.old/usr.sbin/jls/jls.8 src.new/usr.sbin/jls/jls.8
--- src.old/usr.sbin/jls/jls.8 2003-04-08 23:04:12.000000000 -0400
+++ src.new/usr.sbin/jls/jls.8 2008-05-28 10:18:45.000000000 -0400
@@ -42,7 +42,8 @@
.Sh SEE ALSO
.Xr jail 2 ,
.Xr jail 8 ,
-.Xr jexec 8
+.Xr jexec 8 ,
+.Xr jtune 8
.Sh HISTORY
The
.Nm
diff -burN src.old/usr.sbin/jtune/Makefile src.new/usr.sbin/jtune/Makefile
--- src.old/usr.sbin/jtune/Makefile 1969-12-31 19:00:00.000000000 -0500
+++ src.new/usr.sbin/jtune/Makefile 2008-05-28 03:41:05.000000000 -0400
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+PROG= jtune
+MAN= jtune.8
+DPADD= ${LIBUTIL}
+LDADD= -lutil
+
+WARNS?= 6
+
+.include <bsd.prog.mk>
diff -burN src.old/usr.sbin/jtune/jtune.8 src.new/usr.sbin/jtune/jtune.8
--- src.old/usr.sbin/jtune/jtune.8 1969-12-31 19:00:00.000000000 -0500
+++ src.new/usr.sbin/jtune/jtune.8 2008-05-28 10:19:33.000000000 -0400
@@ -0,0 +1,75 @@
+.\" Copyright (c) 2006 Chris Jones
+.\" All rights reserved.
+.\"
+.\" This software was developed for the FreeBSD Project by Chris Jones
+.\" thanks to the support of Google's Summer of Code program and
+.\" mentoring by Kip Macy.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd August 21, 2006
+.Dt JTUNE 8
+.Os
+.Sh NAME
+.Nm jtune
+.Nd "modify jail resource limits"
+.Sh SYNOPSIS
+.Nm
+.Fl j Ar jail_id
+.Op Fl i
+.Op Fl m Ar mem_limit
+.Op Fl s Ar cpu_shares
+.Sh DESCRIPTION
+The
+.Nm
+utility modifies a jail's memory and CPU usage limits.
+.Pp
+The options are as follows:
+.Bl -tag -width ".Fl u Ar cpu_shares"
+.It Ar jail_id
+Jail identifier (JID) of the jail whose limits are being tuned.
+.It Fl i
+Show jail's resource limits.
+.It Fl m Ar mem_limit
+Limit a jail's memory usage (resident set size) to
+.Ar mem_limit
+megabytes.
+.It Fl s Ar cpu_shares
+Set a jail's CPU shares to
+.Ar cpu_shares
+shares.
+.Sh SEE ALSO
+.Xr jail 2 ,
+.Xr jail 8 ,
+.Xr jexec 8
+.Xr jls 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Fx FIXME .
+.Pp
+.Nm
+was written by Chris Jones through the 2006 Google Summer of Code
+program.
Files src.old/usr.sbin/jtune/jtune.8.gz and src.new/usr.sbin/jtune/jtune.8.gz differ
diff -burN src.old/usr.sbin/jtune/jtune.c src.new/usr.sbin/jtune/jtune.c
--- src.old/usr.sbin/jtune/jtune.c 1969-12-31 19:00:00.000000000 -0500
+++ src.new/usr.sbin/jtune/jtune.c 2008-05-28 03:39:15.000000000 -0400
@@ -0,0 +1,188 @@
+/*-
+ * Copyright (c) 2006 Chris Jones
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Chris Jones
+ * thanks to the support of Google's Summer of Code program and
+ * mentoring by Kip Macy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD");
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/sysctl.h>
+
+#include <err.h>
+#include <errno.h>
+#include <grp.h>
+#include <login_cap.h>
+#include <paths.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static void usage(void);
+static struct xprison *getxprison(int);
+extern char **environ;
+
+int
+main(int argc, char **argv)
+{
+ struct xprison *xp;
+ int jid = 0;
+ int memlimit = -1;
+ int shares = -1;
+ int iflag = 0;
+ int retval;
+ int ch;
+
+ while ((ch = getopt(argc, argv, "ij:m:s:")) != -1) {
+ switch (ch) {
+ case 'i':
+ iflag = 1;
+ break;
+ case 'j':
+ jid = atoi(optarg);
+ if (!jid && errno)
+ err(1, "invalid jail id '%s'", optarg);
+ break;
+
+ case 'm':
+ memlimit = atoi(optarg);
+ if (!memlimit && errno)
+ err(1, "invalid memory limit '%s'", optarg);
+ if (memlimit < 0)
+ errx(1, "invalid memory limit '%s'", optarg);
+ memlimit *= 1024 * 1024;
+ break;
+
+ case 's':
+ shares = atoi(optarg);
+ if (!shares && errno)
+ err(1, "invalid cpu share '%s'", optarg);
+ if (shares < 0)
+ errx(1, "invalid cpu share '%s'", optarg);
+ break;
+
+ default:
+ usage();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (!jid)
+ usage();
+
+ xp = getxprison(jid);
+ if (NULL == xp)
+ errx(1, "no jail with id %d", jid);
+
+ if (iflag) {
+ char *memlimstr, *memusestr;
+
+ asprintf(&memusestr, "%d M",
+ xp->pr_mem_usage / (1024 * 1024));
+ if (xp->pr_mem_limit) {
+ asprintf(&memlimstr, "%d M",
+ xp->pr_mem_limit / (1024 * 1024));
+ } else {
+ asprintf(&memlimstr, "None");
+ }
+
+ if (NULL == memusestr || NULL == memlimstr)
+ err(1, "couldn't allocate memory");
+
+ printf(" JID Hostname Memory Used / Limit CPU Shares\n");
+ printf("%6d %-24.24s %6s / %-6.6s %-4d\n",
+ xp->pr_id, xp->pr_host,
+ memusestr, memlimstr,
+ xp->pr_sched_shares);
+ exit(0);
+ }
+
+ retval = jail_set_resource_limits(jid, shares, memlimit);
+ if (retval) {
+ errx(1, "jail_set_resource_limit(%d, %d, %d) failed",
+ jid, memlimit, shares);
+ }
+ exit(0);
+
+}
+
+static void
+usage()
+{
+ (void)fprintf(stderr, "%s\n",
+ "usage: jtune -j jid_id [-m mem_limit] [-s cpu_shares]");
+ exit(0);
+}
+
+static struct xprison *
+getxprison(int jid)
+{
+ size_t i, len;
+ struct xprison *xpl, *sxpl;
+ if (sysctlbyname("security.jail.list", NULL, &len, NULL, 0) == -1)
+ err(1, "sysctlbyname(): security.jail.list");
+
+ if (len <= 0)
+ errx(1, "sysctl security.jail.list has no entries for jid %d", jid);
+
+ /* getxprison allocates the structure, caller frees */
+ sxpl = xpl = malloc(len);
+ if (NULL == xpl)
+ err(1, "malloc()");
+
+ if (sysctlbyname("security.jail.list", xpl, &len, NULL, 0) == -1) {
+ free(xpl);
+ err(1, "sysctlbyname(): security.jail.list");
+ }
+
+ if (len < sizeof(*xpl) || len % sizeof(*xpl) ||
+ xpl->pr_version != XPRISON_VERSION)
+ errx(1, "Kernel and userland out of sync");
+
+ for (i = 0; i < len / sizeof(*xpl); i++) {
+ if (jid == xpl->pr_id) {
+ struct xprison *xp;
+ xp = malloc(sizeof (struct xprison));
+ if (NULL == xp)
+ err(1, "malloc()");
+ memcpy(xp, xpl, sizeof (struct xprison));
+ free(sxpl);
+ return xp;
+ }
+ xpl++;
+ }
+
+ free(sxpl);
+ return NULL;
+}
_______________________________________________
freebsd-jail@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-jail
To unsubscribe, send any mail to "[EMAIL PROTECTED]"