ola, this semester ive had the fun of setting and marking the assignments for the operating systems course they teach at the university of queensland (where i work).
the second assignment was creating extremely basic process containers using solaris zones as a model. the spec is at http://www.uq.id.au/dlg/comp3301/assignment2.pdf. the diff below is my solution to it. to be clear, i am not asking for oks on this and definitely do not want to see it going into the tree in its current state. it does very simplistic and very incomplete process isolation, but does not isolate any other aspect of the operating system and therefore cannot be considered a useful containment infrastructure. however, i found it interesting to get my head around this aspect of the system, and i figured other people (such as this years comp3301 students) would be interested too. i also felt sad i couldnt find kritaps mult code anywhere, so i wanted this to be backed up by everyone for future posterity. this should also help explain why ive been slacking these last few months. im happy to explain the implementation if people are interested. dlg Index: bin/ps/extern.h =================================================================== RCS file: /cvs/src/bin/ps/extern.h,v retrieving revision 1.17 diff -u -p -r1.17 extern.h --- bin/ps/extern.h 29 Jun 2015 15:03:33 -0000 1.17 +++ bin/ps/extern.h 30 Oct 2015 02:52:16 -0000 @@ -61,6 +61,7 @@ void nlisterr(struct nlist *); void p_rssize(const struct kinfo_proc *, VARENT *); void pagein(const struct kinfo_proc *, VARENT *); void parsefmt(char *); +void zonefmt(void); void pcpu(const struct kinfo_proc *, VARENT *); void pmem(const struct kinfo_proc *, VARENT *); void pri(const struct kinfo_proc *, VARENT *); @@ -83,4 +84,5 @@ void curwd(const struct kinfo_proc *, V void euname(const struct kinfo_proc *, VARENT *); void vsize(const struct kinfo_proc *, VARENT *); void wchan(const struct kinfo_proc *, VARENT *); +void zvar(const struct kinfo_proc *, VARENT *); __END_DECLS Index: bin/ps/keyword.c =================================================================== RCS file: /cvs/src/bin/ps/keyword.c,v retrieving revision 1.42 diff -u -p -r1.42 keyword.c --- bin/ps/keyword.c 16 Jan 2015 06:39:32 -0000 1.42 +++ bin/ps/keyword.c 30 Oct 2015 02:52:16 -0000 @@ -187,6 +187,7 @@ VAR var[] = { {"vsz", "VSZ", NULL, 0, vsize, 5}, {"wchan", "WCHAN", NULL, LJUST, wchan, KI_WMESGLEN - 1}, {"xstat", "XSTAT", NULL, 0, pvar, 4, 0, POFF(p_xstat), UINT16, "x"}, + {"zone", "ZONE", NULL, 0, zvar, 8, 0, POFF(p_zoneid)}, {""}, }; @@ -243,6 +244,20 @@ parsefmt(char *p) } if (!vhead) errx(1, "no valid keywords"); +} + +void +zonefmt(void) +{ + struct varent *vent; + + vent = malloc(sizeof(*vent)); + if (vent == NULL) + err(1, "zone fmt malloc"); + + vent->var = findvar("zone"); + vent->next = vhead; + vhead = vent; } static VAR * Index: bin/ps/print.c =================================================================== RCS file: /cvs/src/bin/ps/print.c,v retrieving revision 1.64 diff -u -p -r1.64 print.c --- bin/ps/print.c 25 Oct 2015 15:26:53 -0000 1.64 +++ bin/ps/print.c 30 Oct 2015 02:52:16 -0000 @@ -47,6 +47,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <zones.h> #include <limits.h> #include <pwd.h> @@ -676,6 +677,25 @@ pvar(const struct kinfo_proc *kp, VARENT (void)printf("%*s", v->width, "-"); else printval((char *)kp + v->off, v); +} + +int zone_name(zoneid_t, char *, size_t); + +void +zvar(const struct kinfo_proc *kp, VARENT *ve) +{ + char zonename[MAXZONENAMELEN]; + VAR *v = ve->var; + int width; + + if (zone_name(kp->p_zoneid, zonename, sizeof(zonename)) == -1) + err(1, "zone_name"); + + if (strlen(zonename) > v->width) { + width = v->width - 1; + (void)printf("%*.*s*", width, width, zonename); + } else + (void)printf("%*s", (int)v->width, zonename); } void Index: bin/ps/ps.1 =================================================================== RCS file: /cvs/src/bin/ps/ps.1,v retrieving revision 1.102 diff -u -p -r1.102 ps.1 --- bin/ps/ps.1 22 Oct 2015 22:21:41 -0000 1.102 +++ bin/ps/ps.1 30 Oct 2015 02:52:16 -0000 @@ -39,7 +39,7 @@ .Sh SYNOPSIS .Nm ps .Sm off -.Op Fl AaceHhjkLlmrSTuvwx +.Op Fl AaceHhjkLlmrSTuvwxZ .Sm on .Op Fl M Ar core .Op Fl N Ar system @@ -49,6 +49,7 @@ .Op Fl t Ar tty .Op Fl U Ar username .Op Fl W Ar swap +.Op Fl z Ar zone .Sh DESCRIPTION The .Nm @@ -173,6 +174,10 @@ option is specified more than once, will use as many columns as necessary without regard for window size. .It Fl x Display information about processes without controlling terminals. +.It Fl Z +Prepends the displayed information with the zone field. +.It Fl z Ar zone +Display information about processes running in the specified zone. .El .Sh KEYWORDS The following is a complete list of the available keywords Index: bin/ps/ps.c =================================================================== RCS file: /cvs/src/bin/ps/ps.c,v retrieving revision 1.67 diff -u -p -r1.67 ps.c --- bin/ps/ps.c 25 Oct 2015 09:39:00 -0000 1.67 +++ bin/ps/ps.c 30 Oct 2015 02:52:16 -0000 @@ -52,6 +52,7 @@ #include <string.h> #include <unistd.h> #include <limits.h> +#include <zones.h> #include "ps.h" @@ -69,6 +70,7 @@ int needcomm, needenv, neednlist, comman enum sort { DEFAULT, SORTMEM, SORTCPU } sortby = DEFAULT; static char *kludge_oldps_options(char *); +static zoneid_t zone_lookup(const char *); static int pscomp(const void *, const void *); static void scanvars(void); static void usage(void); @@ -98,6 +100,9 @@ main(int argc, char *argv[]) int all, ch, flag, i, fmt, lineno, nentries; int prtheader, showthreads, wflag, kflag, what, Uflag, xflg; char *nlistf, *memf, *swapf, *cols, errbuf[_POSIX2_LINE_MAX]; + int Zflag = 0; + const char *zone = NULL; + zoneid_t zoneid = -1; if ((cols = getenv("COLUMNS")) != NULL && *cols != '\0') { const char *errstr; @@ -125,7 +130,7 @@ main(int argc, char *argv[]) ttydev = NODEV; memf = nlistf = swapf = NULL; while ((ch = getopt(argc, argv, - "AaCcegHhjkLlM:mN:O:o:p:rSTt:U:uvW:wx")) != -1) + "AaCcegHhjkLlM:mN:O:o:p:rSTt:U:uvW:wxZz:")) != -1) switch (ch) { case 'A': all = 1; @@ -251,6 +256,12 @@ main(int argc, char *argv[]) case 'x': xflg = 1; break; + case 'Z': + Zflag = 1; + break; + case 'z': + zone = optarg; + break; default: usage(); } @@ -287,6 +298,8 @@ main(int argc, char *argv[]) else parsefmt(dfmt); } + if (Zflag) + zonefmt(); /* XXX - should be cleaner */ if (!all && ttydev == NODEV && pid == -1 && !Uflag) { @@ -325,6 +338,9 @@ main(int argc, char *argv[]) if (showthreads) what |= KERN_PROC_SHOW_THREADS; + if (zone != NULL) + zoneid = zone_lookup(zone); + /* * select procs */ @@ -351,6 +367,8 @@ main(int argc, char *argv[]) * for each proc, call each variable output function. */ for (i = lineno = 0; i < nentries; i++) { + if (zoneid != -1 && zoneid != kinfo[i]->p_zoneid) + continue; if (showthreads == 0 && (kinfo[i]->p_flag & P_THREAD) != 0) continue; if (xflg == 0 && ((int)kinfo[i]->p_tdev == NODEV || @@ -373,6 +391,50 @@ main(int argc, char *argv[]) exit(eval); } +static zoneid_t +zone_lookup(const char *zone) +{ + char zonename[MAXZONENAMELEN]; + const char *errstr; + zoneid_t *zs = NULL; + size_t nzs, i = 8; + zoneid_t z; + + for (;;) { + nzs = i; + + zs = reallocarray(zs, nzs, sizeof(*zs)); + if (zs == NULL) + err(1, "zone lookup"); + + if (zone_list(zs, &nzs) == 0) + break; + + if (errno != EFAULT) + err(1, "zone list"); + + i <<= 1; + } + + for (i = 0; i < nzs; i++) { + z = zs[i]; + if (zone_name(z, zonename, sizeof(zonename)) == -1) + err(1, "zone name"); + if (strcmp(zone, zonename) == 0) { + free(zs); + return (z); + } + } + + free(zs); + + z = strtonum(zone, 0, 1024, &errstr); + if (errstr != NULL) + errx(1, "zone id: %s", errstr); + + return (z); +} + static void scanvars(void) { @@ -478,7 +540,7 @@ static void usage(void) { (void)fprintf(stderr, - "usage: %s [-AaceHhjkLlmrSTuvwx] [-M core] [-N system] [-O fmt] [-o fmt] [-p pid]\n", + "usage: %s [-AaceHhjkLlmrSTuvwxZ] [-M core] [-N system] [-O fmt] [-o fmt] [-p pid]\n", __progname); (void)fprintf(stderr, "%-*s[-t tty] [-U username] [-W swap]\n", (int)strlen(__progname) + 8, ""); Index: include/Makefile =================================================================== RCS file: /cvs/src/include/Makefile,v retrieving revision 1.206 diff -u -p -r1.206 Makefile --- include/Makefile 27 Sep 2015 14:52:47 -0000 1.206 +++ include/Makefile 30 Oct 2015 02:52:20 -0000 @@ -20,7 +20,7 @@ FILES= a.out.h ar.h asr.h assert.h bitst sndio.h \ spawn.h stdbool.h stddef.h stdio.h stdlib.h string.h strings.h struct.h \ sysexits.h tar.h tgmath.h time.h ttyent.h unistd.h utime.h \ - utmp.h uuid.h vis.h wchar.h wctype.h + utmp.h uuid.h vis.h wchar.h wctype.h zones.h FILES+= link.h link_elf.h Index: include/zones.h =================================================================== RCS file: include/zones.h diff -N include/zones.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ include/zones.h 30 Oct 2015 02:52:20 -0000 @@ -0,0 +1,32 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 David Gwynne <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _ZONES_H_ +#define _ZONES_H_ + +#include <sys/zones.h> + +__BEGIN_DECLS +zoneid_t zone_create(const char *); +int zone_destroy(zoneid_t); +int zone_enter(zoneid_t); +int zone_list(zoneid_t *, size_t *); +int zone_name(zoneid_t, char *, size_t); +__END_DECLS + +#endif /* !_UNISTD_H_ */ Index: lib/libc/Symbols.list =================================================================== RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.31 diff -u -p -r1.31 Symbols.list --- lib/libc/Symbols.list 25 Oct 2015 18:01:24 -0000 1.31 +++ lib/libc/Symbols.list 30 Oct 2015 02:52:20 -0000 @@ -1676,3 +1676,9 @@ yperr_string ypprot_err _yp_check /* chpass, passwd, login_chpass */ yp_unbind /* passwd */ + +zone_create +zone_destroy +zone_enter +zone_list +zone_name Index: lib/libc/shlib_version =================================================================== RCS file: /cvs/src/lib/libc/shlib_version,v retrieving revision 1.177 diff -u -p -r1.177 shlib_version --- lib/libc/shlib_version 23 Oct 2015 00:53:13 -0000 1.177 +++ lib/libc/shlib_version 30 Oct 2015 02:52:20 -0000 @@ -1,4 +1,9 @@ major=84 minor=1 # note: If changes were made to include/thread_private.h or if system # calls were added/changed then librthread/shlib_version also be updated. Index: lib/libc/sys/Makefile.inc =================================================================== RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v retrieving revision 1.135 diff -u -p -r1.135 Makefile.inc --- lib/libc/sys/Makefile.inc 23 Oct 2015 04:39:24 -0000 1.135 +++ lib/libc/sys/Makefile.inc 30 Oct 2015 02:52:20 -0000 @@ -58,7 +58,8 @@ ASM= __get_tcb.o __getcwd.o __semctl.o _ shmget.o shutdown.o sigaltstack.o socket.o \ socketpair.o stat.o statfs.o swapctl.o symlink.o symlinkat.o \ sync.o sysarch.o sysctl.o umask.o unlink.o unlinkat.o \ - unmount.o utimensat.o utimes.o utrace.o wait4.o write.o writev.o + unmount.o utimensat.o utimes.o utrace.o wait4.o write.o writev.o \ + zone_create.o zone_destroy.o zone_enter.o zone_list.o zone_name.o SRCS+= ${SRCS_${MACHINE_CPU}} .for i in ${SRCS_${MACHINE_CPU}} Index: sys/arch/sparc64/sparc64/db_trace.c =================================================================== RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_trace.c,v retrieving revision 1.10 diff -u -p -r1.10 db_trace.c --- sys/arch/sparc64/sparc64/db_trace.c 9 Feb 2015 09:21:30 -0000 1.10 +++ sys/arch/sparc64/sparc64/db_trace.c 30 Oct 2015 02:52:22 -0000 @@ -82,7 +82,7 @@ db_stack_trace_print(addr, have_addr, co struct proc *p; struct user *u; (*pr)("trace: pid %d ", (int)addr); - p = pfind(addr); + p = pfind(&process0, addr); if (p == NULL) { (*pr)("not found\n"); return; Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.605 diff -u -p -r1.605 files --- sys/conf/files 24 Oct 2015 10:52:05 -0000 1.605 +++ sys/conf/files 30 Oct 2015 02:52:22 -0000 @@ -698,6 +698,7 @@ file kern/sys_generic.c file kern/sys_pipe.c file kern/sys_process.c ptrace | systrace file kern/sys_socket.c +file kern/sys_zones.c file kern/syscalls.c syscall_debug file kern/sysv_ipc.c sysvshm | sysvsem | sysvmsg file kern/sysv_msg.c sysvmsg Index: sys/dev/systrace.c =================================================================== RCS file: /cvs/src/sys/dev/systrace.c,v retrieving revision 1.77 diff -u -p -r1.77 systrace.c --- sys/dev/systrace.c 8 Sep 2015 11:58:58 -0000 1.77 +++ sys/dev/systrace.c 30 Oct 2015 02:52:22 -0000 @@ -539,7 +539,7 @@ systrace_find(struct str_process *strp) { struct proc *proc; - if ((proc = pfind(strp->pid)) == NULL) + if ((proc = pfind(&process0, strp->pid)) == NULL) return (NULL); if (proc != strp->proc) @@ -1192,7 +1192,8 @@ systrace_attach(struct fsystrace *fst, p struct process *tr; /* target process */ struct str_process *newstrp; - if ((t = pfind(pid)) == NULL || (t->p_flag & P_THREAD)) { + if ((t = pfind(p->p_p, pid)) == NULL || + (t->p_flag & P_THREAD)) { error = ESRCH; goto out; } Index: sys/kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.245 diff -u -p -r1.245 init_main.c --- sys/kern/init_main.c 7 Oct 2015 10:50:35 -0000 1.245 +++ sys/kern/init_main.c 30 Oct 2015 02:52:22 -0000 @@ -47,6 +47,7 @@ #include <sys/kthread.h> #include <sys/mount.h> #include <sys/proc.h> +#include <sys/zones.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/systm.h> @@ -148,6 +149,7 @@ void init_exec(void); void kqueue_init(void); void taskq_init(void); void pool_gc_pages(void *); +void zone_boot(void); extern char sigcode[], esigcode[]; #ifdef SYSCALL_DEBUG @@ -266,6 +268,11 @@ main(void *framep) p->p_ucred->cr_ngroups = 1; /* group 0 */ /* + * Initialize zone subsystem. + */ + zone_boot(); + + /* * Create process 0 (the swapper). */ pr = &process0; @@ -276,6 +283,7 @@ main(void *framep) /* Set the default routing table/domain. */ process0.ps_rtableid = 0; + process0.ps_zone = zone_ref(global_zone); LIST_INSERT_HEAD(&allproc, p, p_list); pr->ps_pgrp = &pgrp0; Index: sys/kern/init_sysent.c =================================================================== RCS file: /cvs/src/sys/kern/init_sysent.c,v retrieving revision 1.177 diff -u -p -r1.177 init_sysent.c --- sys/kern/init_sysent.c 28 Oct 2015 12:04:06 -0000 1.177 +++ sys/kern/init_sysent.c 30 Oct 2015 02:52:22 -0000 @@ -1,4 +1,8 @@ /* $OpenBSD: init_sysent.c,v 1.177 2015/10/28 12:04:06 deraadt Exp $ */ /* * System call switch table. @@ -751,5 +755,15 @@ struct sysent sysent[] = { sys___set_tcb }, /* 329 = __set_tcb */ { 0, 0, SY_NOLOCK | 0, sys___get_tcb }, /* 330 = __get_tcb */ + { 1, s(struct sys_zone_create_args), SY_NOLOCK | 0, + sys_zone_create }, /* 331 = zone_create */ + { 1, s(struct sys_zone_destroy_args), SY_NOLOCK | 0, + sys_zone_destroy }, /* 332 = zone_destroy */ + { 1, s(struct sys_zone_enter_args), SY_NOLOCK | 0, + sys_zone_enter }, /* 333 = zone_enter */ + { 2, s(struct sys_zone_list_args), SY_NOLOCK | 0, + sys_zone_list }, /* 334 = zone_list */ + { 3, s(struct sys_zone_name_args), SY_NOLOCK | 0, + sys_zone_name }, /* 335 = zone_name */ }; Index: sys/kern/kern_descrip.c =================================================================== RCS file: /cvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.123 diff -u -p -r1.123 kern_descrip.c --- sys/kern/kern_descrip.c 28 Oct 2015 12:17:20 -0000 1.123 +++ sys/kern/kern_descrip.c 30 Oct 2015 02:52:22 -0000 @@ -460,7 +460,8 @@ restart: if ((long)SCARG(uap, arg) <= 0) { SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); } else { - struct process *pr1 = prfind((long)SCARG(uap, arg)); + struct process *pr1 = prfind(p->p_p, + (long)SCARG(uap, arg)); if (pr1 == 0) { error = ESRCH; break; Index: sys/kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.66 diff -u -p -r1.66 kern_event.c --- sys/kern/kern_event.c 29 Oct 2015 13:20:44 -0000 1.66 +++ sys/kern/kern_event.c 30 Oct 2015 02:52:22 -0000 @@ -212,7 +212,7 @@ filt_procattach(struct knote *kn) { struct process *pr; - pr = prfind(kn->kn_id); + pr = prfind(curproc->p_p, kn->kn_id); if (pr == NULL) return (ESRCH); Index: sys/kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.154 diff -u -p -r1.154 kern_exit.c --- sys/kern/kern_exit.c 9 Oct 2015 01:10:27 -0000 1.154 +++ sys/kern/kern_exit.c 30 Oct 2015 02:52:22 -0000 @@ -41,6 +41,7 @@ #include <sys/systm.h> #include <sys/ioctl.h> #include <sys/proc.h> +#include <sys/zones.h> #include <sys/tty.h> #include <sys/time.h> #include <sys/resource.h> @@ -600,7 +601,7 @@ proc_finish_wait(struct proc *waiter, st * we need to give it back to the old parent. */ pr = p->p_p; - if (pr->ps_oppid && (tr = prfind(pr->ps_oppid))) { + if (pr->ps_oppid && (tr = prfind(&process0, pr->ps_oppid))) { atomic_clearbits_int(&pr->ps_flags, PS_TRACED); pr->ps_oppid = 0; proc_reparent(pr, tr); @@ -660,6 +661,7 @@ process_zap(struct process *pr) if (otvp) vrele(otvp); + zone_unref(pr->ps_zone); KASSERT(pr->ps_refcnt == 1); if (pr->ps_ptstat != NULL) free(pr->ps_ptstat, M_SUBPROC, sizeof(*pr->ps_ptstat)); Index: sys/kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.184 diff -u -p -r1.184 kern_fork.c --- sys/kern/kern_fork.c 9 Oct 2015 01:10:27 -0000 1.184 +++ sys/kern/kern_fork.c 30 Oct 2015 02:52:22 -0000 @@ -44,6 +44,7 @@ #include <sys/malloc.h> #include <sys/mount.h> #include <sys/proc.h> +#include <sys/zones.h> #include <sys/exec.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> @@ -225,6 +226,8 @@ process_new(struct proc *p, struct proce else pr->ps_vmspace = uvmspace_fork(parent); + pr->ps_zone = zone_ref(parent->ps_zone); + if (pr->ps_pledgepaths) pr->ps_pledgepaths->wl_ref++; @@ -574,7 +577,7 @@ ispidtaken(pid_t pid) if (pid == oldpids[i]) return (1); - if (pfind(pid) != NULL) + if (pfind(&process0, pid) != NULL) return (1); if (pgfind(pid) != NULL) return (1); Index: sys/kern/kern_ktrace.c =================================================================== RCS file: /cvs/src/sys/kern/kern_ktrace.c,v retrieving revision 1.82 diff -u -p -r1.82 kern_ktrace.c --- sys/kern/kern_ktrace.c 28 Oct 2015 11:16:23 -0000 1.82 +++ sys/kern/kern_ktrace.c 30 Oct 2015 02:52:22 -0000 @@ -508,7 +508,7 @@ sys_ktrace(struct proc *p, void *v, regi /* * by pid */ - pr = prfind(SCARG(uap, pid)); + pr = prfind(p->p_p, SCARG(uap, pid)); if (pr == NULL) { error = ESRCH; goto done; Index: sys/kern/kern_malloc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_malloc.c,v retrieving revision 1.128 diff -u -p -r1.128 kern_malloc.c --- sys/kern/kern_malloc.c 14 Mar 2015 03:38:50 -0000 1.128 +++ sys/kern/kern_malloc.c 30 Oct 2015 02:52:22 -0000 @@ -144,8 +144,8 @@ struct timeval malloc_lasterr; /* * Allocate a block of memory */ -void * -malloc(size_t size, int type, int flags) +static inline void * +_malloc(size_t size, int type, int flags) { struct kmembuckets *kbp; struct kmemusage *kup; @@ -348,11 +348,23 @@ out: return (va); } +void * +malloc(size_t size, int type, int flags) +{ + void *v; + + KERNEL_LOCK(); + v = _malloc(size, type, flags); + KERNEL_UNLOCK(); + + return (v); +} + /* * Free a block of memory allocated by malloc. */ -void -free(void *addr, int type, size_t freedsize) +static inline void +_free(void *addr, int type, size_t freedsize) { struct kmembuckets *kbp; struct kmemusage *kup; @@ -463,6 +475,14 @@ free(void *addr, int type, size_t freeds #endif XSIMPLEQ_INSERT_TAIL(&kbp->kb_freelist, freep, kf_flist); splx(s); +} + +void +free(void *addr, int type, size_t freedsize) +{ + KERNEL_LOCK(); + _free(addr, type, freedsize); + KERNEL_UNLOCK(); } /* Index: sys/kern/kern_proc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.65 diff -u -p -r1.65 kern_proc.c --- sys/kern/kern_proc.c 11 Sep 2015 08:27:39 -0000 1.65 +++ sys/kern/kern_proc.c 30 Oct 2015 02:52:22 -0000 @@ -36,6 +36,7 @@ #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> +#include <sys/zones.h> #include <sys/buf.h> #include <sys/acct.h> #include <sys/wait.h> @@ -166,13 +167,13 @@ inferior(struct process *pr, struct proc * Locate a proc (thread) by number */ struct proc * -pfind(pid_t pid) +pfind(struct process *self, pid_t pid) { struct proc *p; LIST_FOREACH(p, PIDHASH(pid), p_hash) if (p->p_pid == pid) - return (p); + return (zone_visible(self, p->p_p) ? p : NULL); return (NULL); } @@ -180,13 +181,14 @@ pfind(pid_t pid) * Locate a process by number */ struct process * -prfind(pid_t pid) +prfind(struct process *self, pid_t pid) { struct proc *p; LIST_FOREACH(p, PIDHASH(pid), p_hash) if (p->p_pid == pid) - return (p->p_flag & P_THREAD ? NULL : p->p_p); + return (!zone_visible(self, p->p_p) || + p->p_flag & P_THREAD ? NULL : p->p_p); return (NULL); } Index: sys/kern/kern_prot.c =================================================================== RCS file: /cvs/src/sys/kern/kern_prot.c,v retrieving revision 1.63 diff -u -p -r1.63 kern_prot.c --- sys/kern/kern_prot.c 2 Mar 2015 20:46:50 -0000 1.63 +++ sys/kern/kern_prot.c 30 Oct 2015 02:52:22 -0000 @@ -112,7 +112,7 @@ sys_getpgid(struct proc *curp, void *v, if (SCARG(uap, pid) == 0 || SCARG(uap, pid) == targpr->ps_pid) goto found; - if ((targpr = prfind(SCARG(uap, pid))) == NULL) + if ((targpr = prfind(curp->p_p, SCARG(uap, pid))) == NULL) return (ESRCH); if (targpr->ps_session != curp->p_p->ps_session) return (EPERM); @@ -131,7 +131,7 @@ sys_getsid(struct proc *curp, void *v, r if (SCARG(uap, pid) == 0 || SCARG(uap, pid) == targpr->ps_pid) goto found; - if ((targpr = prfind(SCARG(uap, pid))) == NULL) + if ((targpr = prfind(curp->p_p, SCARG(uap, pid))) == NULL) return (ESRCH); if (targpr->ps_session != curp->p_p->ps_session) return (EPERM); @@ -273,7 +273,8 @@ sys_setpgid(struct proc *curp, void *v, newpgrp = pool_get(&pgrp_pool, PR_WAITOK); if (pid != 0 && pid != curpr->ps_pid) { - if ((targpr = prfind(pid)) == 0 || !inferior(targpr, curpr)) { + if ((targpr = prfind(curp->p_p, pid)) == 0 || + !inferior(targpr, curpr)) { error = ESRCH; goto out; } Index: sys/kern/kern_resource.c =================================================================== RCS file: /cvs/src/sys/kern/kern_resource.c,v retrieving revision 1.54 diff -u -p -r1.54 kern_resource.c --- sys/kern/kern_resource.c 9 Feb 2015 09:39:09 -0000 1.54 +++ sys/kern/kern_resource.c 30 Oct 2015 02:52:22 -0000 @@ -80,7 +80,7 @@ sys_getpriority(struct proc *curp, void if (SCARG(uap, who) == 0) pr = curp->p_p; else - pr = prfind(SCARG(uap, who)); + pr = prfind(curp->p_p, SCARG(uap, who)); if (pr == NULL) break; if (pr->ps_nice < low) @@ -136,7 +136,7 @@ sys_setpriority(struct proc *curp, void if (SCARG(uap, who) == 0) pr = curp->p_p; else - pr = prfind(SCARG(uap, who)); + pr = prfind(curp->p_p, SCARG(uap, who)); if (pr == NULL) break; error = donice(curp, pr, SCARG(uap, prio)); Index: sys/kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.187 diff -u -p -r1.187 kern_sig.c --- sys/kern/kern_sig.c 25 Oct 2015 20:39:54 -0000 1.187 +++ sys/kern/kern_sig.c 30 Oct 2015 02:52:22 -0000 @@ -100,13 +100,16 @@ cansignal(struct proc *p, struct process if (pr == qr) return (1); /* process can always signal itself */ + if (signum == SIGCONT && qr->ps_session == pr->ps_session) + return (1); /* SIGCONT in session */ + + if (pr->ps_zone != qr->ps_zone) + return (0); /* fail non-root in the gz */ + /* optimization: if the same creds then the tests below will pass */ if (uc == quc) return (1); - if (signum == SIGCONT && qr->ps_session == pr->ps_session) - return (1); /* SIGCONT in session */ - /* * Using kill(), only certain signals can be sent to setugid * child processes @@ -591,14 +594,15 @@ sys_kill(struct proc *cp, void *v, regis * be a main thread. */ if (pid > THREAD_PID_OFFSET) { - if ((p = pfind(pid - THREAD_PID_OFFSET)) == NULL) + if ((p = pfind(cp->p_p, + pid - THREAD_PID_OFFSET)) == NULL) return (ESRCH); if (p->p_p != cp->p_p) return (ESRCH); type = STHREAD; } else { /* XXX use prfind() */ - if ((p = pfind(pid)) == NULL) + if ((p = pfind(cp->p_p, pid)) == NULL) return (ESRCH); if (p->p_flag & P_THREAD) return (ESRCH); @@ -698,7 +702,7 @@ csignal(pid_t pgid, int signum, uid_t ui if (CANDELIVER(uid, euid, pr)) prsignal(pr, signum); } else { - if ((pr = prfind(pgid)) == NULL) + if ((pr = prfind(&process0, pgid)) == NULL) return; if (CANDELIVER(uid, euid, pr)) prsignal(pr, signum); Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.297 diff -u -p -r1.297 kern_sysctl.c --- sys/kern/kern_sysctl.c 25 Oct 2015 20:39:54 -0000 1.297 +++ sys/kern/kern_sysctl.c 30 Oct 2015 02:52:22 -0000 @@ -45,6 +45,7 @@ #include <sys/malloc.h> #include <sys/pool.h> #include <sys/proc.h> +#include <sys/zones.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/file.h> @@ -372,7 +373,7 @@ kern_sysctl(int *name, u_int namelen, vo } #ifndef SMALL_KERNEL case KERN_PROC: - return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp)); + return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp, p)); case KERN_PROC_ARGS: return (sysctl_proc_args(name + 1, namelen - 1, oldp, oldlenp, p)); @@ -1410,7 +1411,8 @@ sysctl_file(int *name, u_int namelen, ch #define KERN_PROCSLOP 5 int -sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep) +sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep, + struct proc *cp) { struct kinfo_proc *kproc = NULL; struct proc *p; @@ -1445,6 +1447,9 @@ sysctl_doproc(int *name, u_int namelen, doingzomb = 0; again: for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) { + if (!zone_visible(cp->p_p, pr)) + continue; + /* XXX skip processes in the middle of being zapped */ if (pr->ps_pgrp == NULL) continue; @@ -1667,7 +1672,7 @@ sysctl_proc_args(int *name, u_int namele return (EOPNOTSUPP); } - if ((vpr = prfind(pid)) == NULL) + if ((vpr = prfind(cp->p_p, pid)) == NULL) return (ESRCH); if (oldp == NULL) { @@ -1858,7 +1863,7 @@ sysctl_proc_cwd(int *name, u_int namelen return (EINVAL); pid = name[0]; - if ((findpr = prfind(pid)) == NULL) + if ((findpr = prfind(cp->p_p, pid)) == NULL) return (ESRCH); if (oldp == NULL) { @@ -1920,7 +1925,7 @@ sysctl_proc_nobroadcastkill(int *name, u return (EINVAL); pid = name[0]; - if ((findpr = prfind(pid)) == NULL) + if ((findpr = prfind(cp->p_p, pid)) == NULL) return (ESRCH); /* Either system process or exiting/zombie */ @@ -1981,7 +1986,7 @@ sysctl_proc_vmmap(int *name, u_int namel /* Self process mapping. */ findpr = cp->p_p; } else if (pid > 0) { - if ((findpr = prfind(pid)) == NULL) + if ((findpr = prfind(cp->p_p, pid)) == NULL) return (ESRCH); /* Either system process or exiting/zombie */ Index: sys/kern/kern_time.c =================================================================== RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.94 diff -u -p -r1.94 kern_time.c --- sys/kern/kern_time.c 9 Oct 2015 01:10:27 -0000 1.94 +++ sys/kern/kern_time.c 30 Oct 2015 02:52:22 -0000 @@ -139,7 +139,8 @@ clock_gettime(struct proc *p, clockid_t default: /* check for clock from pthread_getcpuclockid() */ if (__CLOCK_TYPE(clock_id) == CLOCK_THREAD_CPUTIME_ID) { - q = pfind(__CLOCK_PTID(clock_id) - THREAD_PID_OFFSET); + q = pfind(p->p_p, + __CLOCK_PTID(clock_id) - THREAD_PID_OFFSET); if (q == NULL || q->p_p != p->p_p) return (ESRCH); *tp = q->p_tu.tu_runtime; @@ -232,7 +233,8 @@ sys_clock_getres(struct proc *p, void *v default: /* check for clock from pthread_getcpuclockid() */ if (__CLOCK_TYPE(clock_id) == CLOCK_THREAD_CPUTIME_ID) { - q = pfind(__CLOCK_PTID(clock_id) - THREAD_PID_OFFSET); + q = pfind(p->p_p, + __CLOCK_PTID(clock_id) - THREAD_PID_OFFSET); if (q == NULL || q->p_p != p->p_p) return (ESRCH); ts.tv_sec = 0; Index: sys/kern/sys_generic.c =================================================================== RCS file: /cvs/src/sys/kern/sys_generic.c,v retrieving revision 1.108 diff -u -p -r1.108 sys_generic.c --- sys/kern/sys_generic.c 18 Oct 2015 05:26:55 -0000 1.108 +++ sys/kern/sys_generic.c 30 Oct 2015 02:52:22 -0000 @@ -493,7 +493,7 @@ sys_ioctl(struct proc *p, void *v, regis if (tmp <= 0) { tmp = -tmp; } else { - struct process *pr = prfind(tmp); + struct process *pr = prfind(p->p_p, tmp); if (pr == NULL) { error = ESRCH; break; @@ -786,7 +786,7 @@ selrecord(struct proc *selector, struct mypid = selector->p_pid; if (sip->si_selpid == mypid) return; - if (sip->si_selpid && (p = pfind(sip->si_selpid)) && + if (sip->si_selpid && (p = pfind(selector->p_p, sip->si_selpid)) && p->p_wchan == (caddr_t)&selwait) sip->si_flags |= SI_COLL; else @@ -810,7 +810,7 @@ selwakeup(struct selinfo *sip) sip->si_flags &= ~SI_COLL; wakeup(&selwait); } - p = pfind(sip->si_selpid); + p = pfind(&process0, sip->si_selpid); sip->si_selpid = 0; if (p != NULL) { SCHED_LOCK(s); Index: sys/kern/sys_process.c =================================================================== RCS file: /cvs/src/sys/kern/sys_process.c,v retrieving revision 1.68 diff -u -p -r1.68 sys_process.c --- sys/kern/sys_process.c 24 Sep 2015 20:35:18 -0000 1.68 +++ sys/kern/sys_process.c 30 Oct 2015 02:52:22 -0000 @@ -126,7 +126,7 @@ sys_ptrace(struct proc *p, void *v, regi case PT_GET_THREAD_NEXT: default: /* Find the process we're supposed to be operating on. */ - if ((t = pfind(SCARG(uap, pid))) == NULL) + if ((t = pfind(p->p_p, SCARG(uap, pid))) == NULL) return (ESRCH); if (t->p_flag & P_THREAD) return (ESRCH); @@ -153,11 +153,11 @@ sys_ptrace(struct proc *p, void *v, regi case PT_SETXMMREGS: #endif if (SCARG(uap, pid) > THREAD_PID_OFFSET) { - t = pfind(SCARG(uap, pid) - THREAD_PID_OFFSET); + t = pfind(p->p_p, SCARG(uap, pid) - THREAD_PID_OFFSET); if (t == NULL) return (ESRCH); } else { - if ((t = pfind(SCARG(uap, pid))) == NULL) + if ((t = pfind(p->p_p, SCARG(uap, pid))) == NULL) return (ESRCH); if (t->p_flag & P_THREAD) return (ESRCH); @@ -314,7 +314,7 @@ sys_ptrace(struct proc *p, void *v, regi if (error) return (error); - t = pfind(pts.pts_tid - THREAD_PID_OFFSET); + t = pfind(p->p_p, pts.pts_tid - THREAD_PID_OFFSET); if (t == NULL || ISSET(t->p_flag, P_WEXIT)) return (ESRCH); if (t->p_p != tr) @@ -499,7 +499,7 @@ sys_ptrace(struct proc *p, void *v, regi if (tr->ps_oppid != tr->ps_pptr->ps_pid) { struct process *ppr; - ppr = prfind(tr->ps_oppid); + ppr = prfind(&process0, tr->ps_oppid); proc_reparent(tr, ppr ? ppr : initprocess); } Index: sys/kern/sys_zones.c =================================================================== RCS file: sys/kern/sys_zones.c diff -N sys/kern/sys_zones.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/sys_zones.c 30 Oct 2015 02:52:22 -0000 @@ -0,0 +1,371 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 David Gwynne <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/zones.h> + +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/syscallargs.h> + +#include <sys/types.h> +#include <sys/errno.h> + +#include <sys/malloc.h> +#include <sys/pool.h> +#include <sys/rwlock.h> +#include <sys/atomic.h> +#include <sys/tree.h> + +struct zone { + zoneid_t z_id; + u_int z_refs; + char *z_name; + size_t z_namelen; + + RB_ENTRY(zone) z_entry; +}; + +struct zone zone_global = { + 0, + 1, + "global" +}; + +struct zone * const global_zone = &zone_global; + +RB_HEAD(zone_tree, zone); + +struct { + struct rwlock zs_lock; + struct zone_tree zs_tree; + struct pool zs_pool; +} zones = { + RWLOCK_INITIALIZER("zones"), + RB_INITIALIZER(&zones.zs_tree), +}; + +static inline int +zone_cmp(struct zone *a, struct zone *b) +{ + if (a->z_id < b->z_id) + return (-1); + if (a->z_id > b->z_id) + return (1); + return (0); +} + +int zone_global_list(zoneid_t **, size_t *); + +RB_PROTOTYPE(zone_tree, zone, z_entry, zone_cmp); + +void +zone_boot(void) +{ + pool_init(&zones.zs_pool, sizeof(struct zone), 0, 0, PR_WAITOK, + "zonepl", NULL); + pool_setipl(&zones.zs_pool, IPL_NONE); + pool_sethardlimit(&zones.zs_pool, MAXZONES - 1, + "zones limit reached", 1); + + global_zone->z_namelen = strlen(global_zone->z_name); + RB_INSERT(zone_tree, &zones.zs_tree, zone_ref(global_zone)); +} + +int +sys_zone_create(struct proc *p, void *v, register_t *retval) +{ + struct sys_zone_create_args /* { + const char *zonename; + } */ *uap = v; + struct zone *zone, *newzone; + char zonename[MAXZONENAMELEN]; + size_t zonenamelen; + int rv; + + *retval = -1; + + if (p->p_p->ps_zone != global_zone || suser(p, 0) != 0) + return (EPERM); + + rv = copyinstr(SCARG(uap, zonename), zonename, + sizeof(zonename), &zonenamelen); + if (rv != 0) + return (rv); + + newzone = pool_get(&zones.zs_pool, PR_WAITOK | PR_LIMITFAIL); + if (newzone == NULL) + return (ERANGE); + + newzone->z_refs = 1; /* give one to the zones tree */ + newzone->z_namelen = zonenamelen; + newzone->z_name = malloc(zone->z_namelen, M_DEVBUF, M_WAITOK); + memcpy(newzone->z_name, zonename, zone->z_namelen); + + rw_enter_write(&zones.zs_lock); + RB_FOREACH(zone, zone_tree, &zones.zs_tree) { /* O(n) */ + if (strcmp(zone->z_name, zonename) == 0) { + rw_exit_write(&zones.zs_lock); + free(newzone->z_name, M_DEVBUF, zone->z_namelen); + pool_put(&zones.zs_pool, newzone); + return (EEXIST); + } + } + + do + newzone->z_id = arc4random() % MAXZONEIDS; + while (RB_INSERT(zone_tree, &zones.zs_tree, newzone) != NULL); + + *retval = newzone->z_id; + rw_exit_write(&zones.zs_lock); + + return (0); +} + +struct zone * +zone_lookup(zoneid_t z) +{ + struct zone key; + struct zone *zone; + + key.z_id = z; + + rw_enter_read(&zones.zs_lock); + zone = RB_FIND(zone_tree, &zones.zs_tree, &key); + if (zone != NULL) + zone_ref(zone); + rw_exit_read(&zones.zs_lock); + + return (zone); +} + +struct zone * +zone_ref(struct zone *zone) +{ + atomic_inc_int(&zone->z_refs); + return (zone); +} + +void +zone_unref(struct zone *zone) +{ + atomic_dec_int(&zone->z_refs); +} + +int +zone_visible(struct process *self, struct process *target) +{ + struct zone *zone = self->ps_zone; + + return (zone == global_zone || zone == target->ps_zone); +} + +zoneid_t +zone_id(const struct zone *zone) +{ + return (zone->z_id); +} + +int +sys_zone_destroy(struct proc *p, void *v, register_t *retval) +{ + struct sys_zone_destroy_args /* { + zoneid_t z; + } */ *uap = v; + struct zone key; + struct zone *zone; + int rv; + + *retval = -1; + if (p->p_p->ps_zone != global_zone || suser(p, 0) != 0) + return (EPERM); + + key.z_id = SCARG(uap, z); + + rw_enter_write(&zones.zs_lock); + zone = RB_FIND(zone_tree, &zones.zs_tree, &key); + if (zone == NULL) { + rv = ESRCH; + goto fail; + } + + if (atomic_cas_uint(&zone->z_refs, 1, 0) != 1) { + rv = EBUSY; + goto fail; + } + + RB_REMOVE(zone_tree, &zones.zs_tree, zone); + rw_exit_write(&zones.zs_lock); + + free(zone->z_name, M_DEVBUF, zone->z_namelen); + pool_put(&zones.zs_pool, zone); + + *retval = 0; + return (0); + +fail: + rw_exit_write(&zones.zs_lock); + return (rv); +} + +int +sys_zone_enter(struct proc *p, void *v, register_t *retval) +{ + struct sys_zone_enter_args /* { + zoneid_t z; + } */ *uap = v; + struct zone *zone; + + *retval = -1; + if (p->p_p->ps_zone != global_zone || suser(p, 0) != 0) + return (EPERM); + + zone = zone_lookup(SCARG(uap, z)); + if (zone == NULL) + return (ESRCH); + + if (atomic_cas_ptr(&p->p_p->ps_zone, global_zone, zone) != + global_zone) { + zone_unref(zone); + return (EPERM); + } + /* we're giving the zone_lookup ref to this process */ + + zone_unref(global_zone); /* drop gz ref */ + + *retval = 0; + return (0); +} + +int +zone_global_list(zoneid_t **zsp, size_t *nzsp) +{ + struct zone *zone; + zoneid_t *zs; + size_t nzs; + int i = 0; + + /* sneaking info off the pool is naughty */ + nzs = zones.zs_pool.pr_nout + 1; /* count the gz */ + if (nzs > *nzsp) + return (ERANGE); + + zs = mallocarray(nzs, sizeof(*zs), M_TEMP, M_WAITOK); + + RB_FOREACH(zone, zone_tree, &zones.zs_tree) + zs[i++] = zone->z_id; + + KASSERT(i == nzs); + + *zsp = zs; + *nzsp = nzs; + + return (0); +} + +int +sys_zone_list(struct proc *p, void *v, register_t *retval) +{ + struct sys_zone_list_args /* { + zone_t *zs; + size_t *nzs; + } */ *uap = v; + + struct zone *zone; + zoneid_t *zs; + zoneid_t *zsp = SCARG(uap, zs); + size_t nzs; + size_t *nzsp = SCARG(uap, nzs); + int rv; + + *retval = -1; + + zone = p->p_p->ps_zone; + + rv = copyin(nzsp, &nzs, sizeof(nzs)); + if (rv != 0) + return (rv); + + if (zone == global_zone) { + rw_enter_read(&zones.zs_lock); + rv = zone_global_list(&zs, &nzs); + rw_exit_read(&zones.zs_lock); + if (rv != 0) + return (rv); + + rv = copyout(zs, zsp, nzs * sizeof(zoneid_t)); + free(zs, M_TEMP, nzs * sizeof(zoneid_t)); + if (rv != 0) + return (rv); + } else { + if (nzs < 1) + return (ERANGE); + nzs = 1; + rv = copyout(&zone->z_id, zsp, sizeof(zoneid_t)); + if (rv != 0) + return (rv); + } + + rv = copyout(&nzs, nzsp, sizeof(nzs)); + if (rv != 0) + return (rv); + + *retval = 0; + return (0); +} + + +int +sys_zone_name(struct proc *p, void *v, register_t *retval) +{ + struct sys_zone_name_args /* { + zoneid_t z; + char *name; + size_t namelen; + } */ *uap = v; + struct zone *zone; + zoneid_t z; + int rv; + + *retval = -1; + + zone = p->p_p->ps_zone; + z = SCARG(uap, z); + + if (zone == global_zone) { + zone = zone_lookup(z); + if (zone == NULL) + return (ESRCH); + } else if (zone->z_id != z) + return (ESRCH); + else + zone_ref(zone); + + rv = copyoutstr(zone->z_name, + SCARG(uap, name), SCARG(uap, namelen), NULL); + zone_unref(zone); + if (rv != 0) + return (EFAULT); + + *retval = 0; + return (0); +} + +RB_GENERATE(zone_tree, zone, z_entry, zone_cmp); Index: sys/kern/syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/syscalls.c,v retrieving revision 1.176 diff -u -p -r1.176 syscalls.c --- sys/kern/syscalls.c 28 Oct 2015 12:04:06 -0000 1.176 +++ sys/kern/syscalls.c 30 Oct 2015 02:52:22 -0000 @@ -1,4 +1,8 @@ /* $OpenBSD: syscalls.c,v 1.176 2015/10/28 12:04:06 deraadt Exp $ */ /* * System call names. @@ -393,4 +397,9 @@ char *syscallnames[] = { "#328 (obsolete __tfork51)", /* 328 = obsolete __tfork51 */ "__set_tcb", /* 329 = __set_tcb */ "__get_tcb", /* 330 = __get_tcb */ + "zone_create", /* 331 = zone_create */ + "zone_destroy", /* 332 = zone_destroy */ + "zone_enter", /* 333 = zone_enter */ + "zone_list", /* 334 = zone_list */ + "zone_name", /* 335 = zone_name */ }; Index: sys/kern/syscalls.master =================================================================== RCS file: /cvs/src/sys/kern/syscalls.master,v retrieving revision 1.164 diff -u -p -r1.164 syscalls.master --- sys/kern/syscalls.master 28 Oct 2015 12:03:39 -0000 1.164 +++ sys/kern/syscalls.master 30 Oct 2015 02:52:22 -0000 @@ -561,3 +561,9 @@ 328 OBSOL __tfork51 329 STD NOLOCK { void sys___set_tcb(void *tcb); } 330 STD NOLOCK { void *sys___get_tcb(void); } +331 STD NOLOCK { zoneid_t sys_zone_create(const char *zonename); } +332 STD NOLOCK { int sys_zone_destroy(zoneid_t z); } +333 STD NOLOCK { int sys_zone_enter(zoneid_t z); } +334 STD NOLOCK { int sys_zone_list(zoneid_t *zs, size_t *nzs); } +335 STD NOLOCK { int sys_zone_name(zoneid_t z, \ + char *name, size_t namelen); } Index: sys/sys/_types.h =================================================================== RCS file: /cvs/src/sys/sys/_types.h,v retrieving revision 1.9 diff -u -p -r1.9 _types.h --- sys/sys/_types.h 22 Aug 2014 23:05:15 -0000 1.9 +++ sys/sys/_types.h 30 Oct 2015 02:52:22 -0000 @@ -64,6 +64,7 @@ typedef __int32_t __swblk_t; /* swap off typedef __int64_t __time_t; /* epoch time */ typedef __int32_t __timer_t; /* POSIX timer identifiers */ typedef __uint32_t __uid_t; /* user id */ +typedef __int32_t __zoneid_t; /* zone id */ typedef __uint32_t __useconds_t; /* microseconds */ /* Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.210 diff -u -p -r1.210 proc.h --- sys/sys/proc.h 25 Oct 2015 20:39:54 -0000 1.210 +++ sys/sys/proc.h 30 Oct 2015 02:52:22 -0000 @@ -84,6 +84,7 @@ struct exec_package; struct proc; struct ps_strings; struct uvm_object; +struct zone; struct whitepaths; union sigval; @@ -167,6 +168,7 @@ struct process { struct vnode *ps_textvp; /* Vnode of executable. */ struct filedesc *ps_fd; /* Ptr to open files structure */ struct vmspace *ps_vmspace; /* Address space */ + struct zone *ps_zone; /* Zone reference */ /* The following fields are all zeroed upon creation in process_new. */ #define ps_startzero ps_klist @@ -451,6 +453,8 @@ extern u_long pidhash; extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; +extern struct zone * const global_zone; /* Address of the global zone */ + extern struct proc proc0; /* Process slot for swapper. */ extern struct process process0; /* Process slot for kernel threads. */ extern int nprocesses, maxprocess; /* Cur and max number of processes. */ @@ -478,8 +482,8 @@ int ispidtaken(pid_t); pid_t allocpid(void); void freepid(pid_t); -struct process *prfind(pid_t); /* Find process by id. */ -struct proc *pfind(pid_t); /* Find thread by id. */ +struct process *prfind(struct process *, pid_t); /* Find process by id. */ +struct proc *pfind(struct process *, pid_t); /* Find thread by id. */ struct pgrp *pgfind(pid_t); /* Find process group by id. */ void proc_printit(struct proc *p, const char *modif, int (*pr)(const char *, ...)); Index: sys/sys/syscall.h =================================================================== RCS file: /cvs/src/sys/sys/syscall.h,v retrieving revision 1.175 diff -u -p -r1.175 syscall.h --- sys/sys/syscall.h 28 Oct 2015 12:04:06 -0000 1.175 +++ sys/sys/syscall.h 30 Oct 2015 02:52:22 -0000 @@ -1,4 +1,8 @@ /* $OpenBSD: syscall.h,v 1.175 2015/10/28 12:04:06 deraadt Exp $ */ /* * System call numbers. @@ -696,4 +700,19 @@ /* syscall: "__get_tcb" ret: "void *" args: */ #define SYS___get_tcb 330 -#define SYS_MAXSYSCALL 331 +/* syscall: "zone_create" ret: "zoneid_t" args: "const char *" */ +#define SYS_zone_create 331 + +/* syscall: "zone_destroy" ret: "int" args: "zoneid_t" */ +#define SYS_zone_destroy 332 + +/* syscall: "zone_enter" ret: "int" args: "zoneid_t" */ +#define SYS_zone_enter 333 + +/* syscall: "zone_list" ret: "int" args: "zoneid_t *" "size_t *" */ +#define SYS_zone_list 334 + +/* syscall: "zone_name" ret: "int" args: "zoneid_t" "char *" "size_t" */ +#define SYS_zone_name 335 + +#define SYS_MAXSYSCALL 336 Index: sys/sys/syscallargs.h =================================================================== RCS file: /cvs/src/sys/sys/syscallargs.h,v retrieving revision 1.178 diff -u -p -r1.178 syscallargs.h --- sys/sys/syscallargs.h 28 Oct 2015 12:04:06 -0000 1.178 +++ sys/sys/syscallargs.h 30 Oct 2015 02:52:22 -0000 @@ -1,4 +1,8 @@ /* $OpenBSD: syscallargs.h,v 1.178 2015/10/28 12:04:06 deraadt Exp $ */ /* * System call argument lists. @@ -1081,6 +1085,29 @@ struct sys___set_tcb_args { syscallarg(void *) tcb; }; +struct sys_zone_create_args { + syscallarg(const char *) zonename; +}; + +struct sys_zone_destroy_args { + syscallarg(zoneid_t) z; +}; + +struct sys_zone_enter_args { + syscallarg(zoneid_t) z; +}; + +struct sys_zone_list_args { + syscallarg(zoneid_t *) zs; + syscallarg(size_t *) nzs; +}; + +struct sys_zone_name_args { + syscallarg(zoneid_t) z; + syscallarg(char *) name; + syscallarg(size_t) namelen; +}; + /* * System call prototypes. */ @@ -1327,3 +1354,8 @@ int sys_symlinkat(struct proc *, void *, int sys_unlinkat(struct proc *, void *, register_t *); int sys___set_tcb(struct proc *, void *, register_t *); int sys___get_tcb(struct proc *, void *, register_t *); +int sys_zone_create(struct proc *, void *, register_t *); +int sys_zone_destroy(struct proc *, void *, register_t *); +int sys_zone_enter(struct proc *, void *, register_t *); +int sys_zone_list(struct proc *, void *, register_t *); +int sys_zone_name(struct proc *, void *, register_t *); Index: sys/sys/sysctl.h =================================================================== RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.157 diff -u -p -r1.157 sysctl.h --- sys/sys/sysctl.h 23 Oct 2015 10:22:30 -0000 1.157 +++ sys/sys/sysctl.h 30 Oct 2015 02:52:22 -0000 @@ -441,6 +441,7 @@ struct kinfo_proc { u_int64_t p_vm_map_size; /* VSIZE_T: virtual size */ int32_t p_tid; /* PID_T: Thread identifier. */ u_int32_t p_rtableid; /* U_INT: Routing table identifier. */ + int32_t p_zoneid; /* ZONEID_T: zone identifier */ }; /* @@ -648,6 +649,7 @@ do { \ \ (kp)->p_cpuid = KI_NOCPU; \ (kp)->p_rtableid = (pr)->ps_rtableid; \ + (kp)->p_zoneid = zone_id((pr)->ps_zone); \ } while (0) #endif /* defined(_KERNEL) || defined(_LIBKVM) */ @@ -941,7 +943,7 @@ int sysctl_rdstring(void *, size_t *, vo int sysctl_rdstruct(void *, size_t *, void *, const void *, int); int sysctl_struct(void *, size_t *, void *, size_t, void *, int); int sysctl_file(int *, u_int, char *, size_t *, struct proc *); -int sysctl_doproc(int *, u_int, char *, size_t *); +int sysctl_doproc(int *, u_int, char *, size_t *, struct proc *); struct rtentry; struct walkarg; int sysctl_dumpentry(struct rtentry *, void *, unsigned int); Index: sys/sys/types.h =================================================================== RCS file: /cvs/src/sys/sys/types.h,v retrieving revision 1.44 diff -u -p -r1.44 types.h --- sys/sys/types.h 4 Sep 2015 23:47:09 -0000 1.44 +++ sys/sys/types.h 30 Oct 2015 02:52:22 -0000 @@ -147,6 +147,7 @@ typedef __rlim_t rlim_t; /* resource li typedef __segsz_t segsz_t; /* segment size */ typedef __swblk_t swblk_t; /* swap offset */ typedef __uid_t uid_t; /* user id */ +typedef __zoneid_t zoneid_t; /* zone id */ typedef __useconds_t useconds_t; /* microseconds */ typedef __suseconds_t suseconds_t; /* microseconds (signed) */ typedef __fsblkcnt_t fsblkcnt_t; /* file system block count */ Index: sys/sys/zones.h =================================================================== RCS file: sys/sys/zones.h diff -N sys/sys/zones.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/zones.h 30 Oct 2015 02:52:22 -0000 @@ -0,0 +1,33 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 David Gwynne <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_ZONES_H_ +#define _SYS_ZONES_H_ + +#define MAXZONENAMELEN 256 /* max zone name length w/ NUL */ +#define MAXZONES 1024 +#define MAXZONEIDS (MAXZONES * 32) + +#ifdef _KERNEL +int zone_visible(struct process *, struct process *); +struct zone * zone_ref(struct zone *); +void zone_unref(struct zone *); +zoneid_t zone_id(const struct zone *); +#endif + +#endif /* _SYS_ZONES_H_ */ Index: usr.bin/pkill/pkill.c =================================================================== RCS file: /cvs/src/usr.bin/pkill/pkill.c,v retrieving revision 1.38 diff -u -p -r1.38 pkill.c --- usr.bin/pkill/pkill.c 11 Oct 2015 03:08:20 -0000 1.38 +++ usr.bin/pkill/pkill.c 30 Oct 2015 02:52:23 -0000 @@ -52,6 +52,7 @@ #include <pwd.h> #include <grp.h> #include <errno.h> +#include <zones.h> #define STATUS_MATCH 0 #define STATUS_NOMATCH 1 @@ -108,6 +109,7 @@ int grepact(struct kinfo_proc *, int); void makelist(struct listhead *, enum listtype, char *); char *getargv(struct kinfo_proc *); int askyn(struct kinfo_proc *); +zoneid_t getzoneid(const char *); extern char *__progname; @@ -151,6 +153,8 @@ main(int argc, char **argv) u_int32_t bestsec, bestusec; regex_t reg; regmatch_t regmatch; + const char *zone = NULL; + zoneid_t z = -1; if (strcmp(__progname, "pgrep") == 0) { action = grepact; @@ -183,7 +187,7 @@ main(int argc, char **argv) criteria = 0; - while ((ch = getopt(argc, argv, "G:P:T:U:d:fg:Ilnoqs:t:u:vx")) != -1) + while ((ch = getopt(argc, argv, "G:P:T:U:d:fg:Ilnoqs:t:u:vxz:")) != -1) switch (ch) { case 'G': makelist(&rgidlist, LT_GROUP, optarg); @@ -248,6 +252,9 @@ main(int argc, char **argv) case 'x': fullmatch = 1; break; + case 'z': + zone = optarg; + break; default: usage(); /* NOTREACHED */ @@ -262,6 +269,9 @@ main(int argc, char **argv) mypid = getpid(); + if (zone != NULL) + z = getzoneid(zone); + /* * Retrieve the list of running processes from the kernel. */ @@ -301,6 +311,7 @@ main(int argc, char **argv) for (i = 0, kp = plist; i < nproc; i++, kp++) { if ((kp->p_flag & (P_SYSTEM | P_THREAD)) != 0 || + (z != -1 && z != kp->p_zoneid) || kp->p_pid == mypid) continue; @@ -328,6 +339,7 @@ main(int argc, char **argv) for (i = 0, kp = plist; i < nproc; i++, kp++) { if ((kp->p_flag & (P_SYSTEM | P_THREAD)) != 0 || + (z != -1 && z != kp->p_zoneid) || kp->p_pid == mypid) continue; @@ -439,6 +451,7 @@ main(int argc, char **argv) rv = STATUS_NOMATCH; for (i = 0, j = 0, kp = plist; i < nproc; i++, kp++) { if ((kp->p_flag & (P_SYSTEM | P_THREAD)) != 0 || + (z != -1 && z != kp->p_zoneid) || kp->p_pid == mypid) continue; if (selected[i] == inverse) @@ -474,7 +487,8 @@ usage(void) ustr = "[-signal] [-fIlnoqvx]"; fprintf(stderr, "usage: %s %s [-G gid] [-g pgrp] [-P ppid] [-s sid]" - "\n\t[-T rtable] [-t tty] [-U uid] [-u euid] [pattern ...]\n", + "\n\t[-T rtable] [-t tty] [-U uid] [-u euid] [-z zone] " + "[pattern ...]\n", __progname, ustr); exit(STATUS_BADUSAGE); @@ -636,3 +650,48 @@ makelist(struct listhead *head, enum lis if (empty) usage(); } + +zoneid_t +getzoneid(const char *zone) +{ + char zonename[MAXZONENAMELEN]; + const char *errstr; + zoneid_t *zs = NULL; + size_t nzs, i = 8; + zoneid_t z; + + for (;;) { + nzs = i; + + zs = reallocarray(zs, nzs, sizeof(*zs)); + if (zs == NULL) + err(1, "zone lookup"); + + if (zone_list(zs, &nzs) == 0) + break; + + if (errno != EFAULT) + err(1, "zone list"); + + i <<= 1; + } + + for (i = 0; i < nzs; i++) { + z = zs[i]; + if (zone_name(z, zonename, sizeof(zonename)) == -1) + err(1, "zone name"); + if (strcmp(zone, zonename) == 0) { + free(zs); + return (z); + } + } + + free(zs); + + z = strtonum(zone, 0, MAXZONEIDS, &errstr); + if (errstr != NULL) + errx(1, "unknown zone \"%s\"", zone); + + return (z); +} + diff -rNu /var/empty/Makefile usr.sbin/zone/Makefile --- /var/empty/Makefile Thu Jan 1 10:00:00 1970 +++ usr.sbin/zone/Makefile Thu Aug 13 13:44:35 2015 @@ -0,0 +1,7 @@ + +PROG=zone +SRCS=zone.c +MAN= +CFLAGS+=-Wall + +.include <bsd.prog.mk> diff -rNu /var/empty/zone.c usr.sbin/zone/zone.c --- /var/empty/zone.c Thu Jan 1 10:00:00 1970 +++ usr.sbin/zone/zone.c Fri Oct 30 12:58:44 2015 @@ -0,0 +1,232 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 David Gwynne <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/wait.h> + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <err.h> +#include <errno.h> +#include <zones.h> + +#ifndef nitems +#define nitems(_a) (sizeof(_a) / sizeof(_a[0])) +#endif + +int zcreate(int, char *[]); +int zdestroy(int, char *[]); +int zexec(int, char *[]); +int zlist(int, char *[]); + +__dead void usage(void); + +struct task { + const char *name; + int (*task)(int, char *[]); +}; + +/* must be sorted alphanumerically */ +struct task tasks[] = { + { "create", zcreate }, + { "destroy", zdestroy }, + { "exec", zexec }, + { "list", zlist } +}; + +int task_cmp(const void *, const void *); + +int +task_cmp(const void *a, const void *b) +{ + const struct task *ta = a; + const struct task *tb = b; + + return (strcmp(ta->name, tb->name)); +} + +__dead void +usage(void) +{ + extern char *__progname; + + fprintf(stderr, "usage:\t%s create zonename\n", __progname); + fprintf(stderr, "\t%s destroy zonename\n", __progname); + fprintf(stderr, "\t%s exec zonename command ...\n", __progname); + fprintf(stderr, "\t%s list\n", __progname); + + exit(1); +} + +int +main(int argc, char *argv[]) +{ + struct task key, *t; + + if (argc < 2) + usage(); + + key.name = argv[1]; + t = bsearch(&key, tasks, nitems(tasks), sizeof(tasks[0]), task_cmp); + if (t == NULL) + usage(); + + argc -= 2; + argv += 2; + + return (t->task(argc, argv)); +} + +zoneid_t +getzoneid(const char *zone) +{ + char zonename[MAXZONENAMELEN]; + const char *errstr; + zoneid_t *zs = NULL; + size_t nzs, i = 8; + zoneid_t z; + + for (;;) { + nzs = i; + + zs = reallocarray(zs, nzs, sizeof(*zs)); + if (zs == NULL) + err(1, "zone lookup"); + + if (zone_list(zs, &nzs) == 0) + break; + + if (errno != EFAULT) + err(1, "zone list"); + + i <<= 1; + } + + for (i = 0; i < nzs; i++) { + z = zs[i]; + if (zone_name(z, zonename, sizeof(zonename)) == -1) + err(1, "zone name"); + if (strcmp(zone, zonename) == 0) { + free(zs); + return (z); + } + } + + free(zs); + + z = strtonum(zone, 0, MAXZONEIDS, &errstr); + if (errstr != NULL) + errx(1, "unknown zone \"%s\"", zone); + + return (z); +} + + +int +zcreate(int argc, char *argv[]) +{ + if (argc != 1) + usage(); + + if (zone_create(argv[0]) == -1) + err(1, "create"); + + return (0); +} + +int +zdestroy(int argc, char *argv[]) +{ + zoneid_t z; + + if (argc != 1) + usage(); + + z = getzoneid(argv[0]); + + if (zone_destroy(z) == -1) + err(1, "destroy"); + + return (0); +} + +int +zexec(int argc, char *argv[]) +{ + zoneid_t z; + + if (argc < 2) + usage(); + + z = getzoneid(argv[0]); + + argc -= 1; + argv += 1; + + if (zone_enter(z) == -1) + err(1, "enter"); + + execvp(argv[0], argv); + + err(1, "exec %s", argv[0]); + /* NOTREACHED */ +} + +int +zlist(int argc, char *argv[]) +{ + char zonename[MAXZONENAMELEN]; + zoneid_t *zs = NULL; + size_t nzs, i = 8; + zoneid_t z; + + if (argc != 0) + usage(); + + for (;;) { + nzs = i; + + zs = reallocarray(zs, nzs, sizeof(*zs)); + if (zs == NULL) + err(1, "lookup"); + + if (zone_list(zs, &nzs) == 0) + break; + + if (errno != EFAULT) + err(1, "list"); + + i <<= 1; + } + + printf("%8s %s\n", "ID", "NAME"); + + for (i = 0; i < nzs; i++) { + z = zs[i]; + if (zone_name(z, zonename, sizeof(zonename)) == -1) + err(1, "name"); + printf("%8d %s\n", z, zonename); + } + + free(zs); + + return (0); +}
