Hello,

I'm (still) trying to figure out how jail-aware SysV IPC mechanism should be.

I want to run PostgreSQL in each jail without changing UID for each jail.
If you don't change UID on each jail, it doesn't work due to IPC objects 
conflict between jails.
See also;
  https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=48471
  http://www.freebsddiary.org/jail-multiple.php
  https://wiki.freebsd.org/Jails
  https://forums.freebsd.org/threads/postgresql-in-jail.51528/

There is a patch for 4.7-STABLE on bugzilla (see above) to solve the problem by 
completely separating namespace for each jail in kernel,
but I couldn't find any (other) implementation that works on recent FreeBSD.
I've also tried to re-write the patch for recent FreeBSD, but I couldn't make 
it properly due to my limited kernel knowledge ;(

Anyway, I created (and update) a patch to trying to solve the problem by simply 
separating IPC key_t space for each jail.
The attached patch can be applied to 10-STABLE (or CURRENT?).

After the patch is applied;
- IPC objects created on parent jail, are invisible to children.
- IPC objects created on neighbor jail, are also invisible each other.
- IPC objects craeted on child jail, are VISIBLE from parent.
- IPC key_t spaces are separated between jails. If you see the key_t named 
object from parent, it's shown as IPC_PRIVATE.

I choose this design of feature, however, I'm not sure this is the right design 
for jail-aware IPC.
If you prefer the completely separated namespace approach, it's ok. I want to 
focus on how the IPC mechanism dealing with hierarchical jail system.

So I need more feedbacks. Could you help me please?
You can dig and play with ipcs(1)/ipcrm(1) to see what happend on each jail.

Thanks.

--
Kikuchan
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index e9c71ca..cf63196 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
+#include <sys/ipc.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
@@ -2330,6 +2331,9 @@ prison_remove_one(struct prison *pr)
 		pr->pr_flags &= ~PR_PERSIST;
 	}
 
+	/* SysV IPC cleanup for the prison */
+	ipc_cleanup_for_prison(pr);
+
 	/*
 	 * jail_remove added a reference.  If that's the only one, remove
 	 * the prison now.
diff --git a/sys/kern/sysv_ipc.c b/sys/kern/sysv_ipc.c
index e402cb5..9985b87 100644
--- a/sys/kern/sysv_ipc.c
+++ b/sys/kern/sysv_ipc.c
@@ -47,9 +47,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/ucred.h>
+#include <sys/jail.h>
 
 void (*shmfork_hook)(struct proc *, struct proc *) = NULL;
 void (*shmexit_hook)(struct vmspace *) = NULL;
+void (*sysvshm_cleanup_for_prison_hook)(struct prison *) = NULL;
+void (*sysvmsg_cleanup_for_prison_hook)(struct prison *) = NULL;
+void (*sysvsem_cleanup_for_prison_hook)(struct prison *) = NULL;
 
 /* called from kern_fork.c */
 void
@@ -72,6 +76,19 @@ shmexit(struct vmspace *vm)
 	return;
 }
 
+/* called from kern_jail.c */
+void
+ipc_cleanup_for_prison(struct prison *pr)
+{
+
+	if (sysvshm_cleanup_for_prison_hook != NULL)
+		sysvshm_cleanup_for_prison_hook(pr);
+	if (sysvmsg_cleanup_for_prison_hook != NULL)
+		sysvmsg_cleanup_for_prison_hook(pr);
+	if (sysvsem_cleanup_for_prison_hook != NULL)
+		sysvsem_cleanup_for_prison_hook(pr);
+}
+
 /*
  * Check for IPC permission.
  *
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
index d58cb7e..68775a6 100644
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
+#include <sys/sbuf.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
 
@@ -80,6 +81,9 @@ static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
 static int msginit(void);
 static int msgunload(void);
 static int sysvmsg_modload(struct module *, int, void *);
+static int cr_cansee_msq(struct ucred *, struct msqid_kernel *);
+static void msq_remove(struct msqid_kernel *);
+static void sysvmsg_cleanup_for_prison_myhook(struct prison *);
 
 
 #ifdef MSG_DEBUG
@@ -257,6 +261,7 @@ msginit()
 #endif
 	}
 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
+	sysvmsg_cleanup_for_prison_hook = &sysvmsg_cleanup_for_prison_myhook;
 
 	error = syscall_helper_register(msg_syscalls);
 	if (error != 0)
@@ -282,6 +287,7 @@ msgunload()
 #ifdef COMPAT_FREEBSD32
 	syscall32_helper_unregister(msg32_syscalls);
 #endif
+	sysvmsg_cleanup_for_prison_hook = NULL;
 
 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 		/*
@@ -372,6 +378,69 @@ msg_freehdr(msghdr)
 #endif
 }
 
+static int
+cr_cansee_msq(struct ucred *cred, struct msqid_kernel *msqkptr)
+{
+
+	if (msqkptr->cred == NULL || prison_check(cred, msqkptr->cred))
+		return (EINVAL);
+	return (0);
+}
+
+static void
+msq_remove(struct msqid_kernel *msqkptr)
+{
+	struct msg *msghdr;
+
+	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
+	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
+	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
+	crfree(msqkptr->cred);
+	msqkptr->cred = NULL;
+
+	/* Free the message headers */
+	msghdr = msqkptr->u.msg_first;
+	while (msghdr != NULL) {
+		struct msg *msghdr_tmp;
+
+		/* Free the segments of each message */
+		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
+		msqkptr->u.msg_qnum--;
+		msghdr_tmp = msghdr;
+		msghdr = msghdr->msg_next;
+		msg_freehdr(msghdr_tmp);
+	}
+
+	if (msqkptr->u.msg_cbytes != 0)
+		panic("msg_cbytes is screwed up");
+	if (msqkptr->u.msg_qnum != 0)
+		panic("msg_qnum is screwed up");
+
+	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
+
+#ifdef MAC
+	mac_sysvmsg_cleanup(msqkptr);
+#endif
+
+	wakeup(msqkptr);
+}
+
+static void
+sysvmsg_cleanup_for_prison_myhook(struct prison *pr)
+{
+	int i;
+	struct msqid_kernel *msqkptr;
+
+	mtx_lock(&msq_mtx);
+	for (i = 0; i < msginfo.msgmni; i++) {
+		msqkptr = &msqids[i];
+		if (msqkptr->u.msg_qbytes != 0 &&
+		    msqkptr->cred && msqkptr->cred->cr_prison == pr)
+			msq_remove(msqkptr);
+	}
+	mtx_unlock(&msq_mtx);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct msgctl_args {
 	int	msqid;
@@ -433,6 +502,9 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 		error = EINVAL;
 		goto done2;
 	}
+	error = cr_cansee_msq(td->td_ucred, msqkptr);
+	if (error != 0)
+		goto done2;
 #ifdef MAC
 	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
 	if (error != 0)
@@ -446,7 +518,9 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 
 	case IPC_RMID:
 	{
+#ifdef MAC
 		struct msg *msghdr;
+#endif
 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
 			goto done2;
 
@@ -468,37 +542,7 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 		}
 #endif
 
-		racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
-		racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
-		racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
-		crfree(msqkptr->cred);
-		msqkptr->cred = NULL;
-
-		/* Free the message headers */
-		msghdr = msqkptr->u.msg_first;
-		while (msghdr != NULL) {
-			struct msg *msghdr_tmp;
-
-			/* Free the segments of each message */
-			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
-			msqkptr->u.msg_qnum--;
-			msghdr_tmp = msghdr;
-			msghdr = msghdr->msg_next;
-			msg_freehdr(msghdr_tmp);
-		}
-
-		if (msqkptr->u.msg_cbytes != 0)
-			panic("msg_cbytes is screwed up");
-		if (msqkptr->u.msg_qnum != 0)
-			panic("msg_qnum is screwed up");
-
-		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
-
-#ifdef MAC
-		mac_sysvmsq_cleanup(msqkptr);
-#endif
-
-		wakeup(msqkptr);
+		msq_remove(msqkptr);
 	}
 
 		break;
@@ -578,6 +622,7 @@ sys_msgget(td, uap)
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			msqkptr = &msqids[msqid];
 			if (msqkptr->u.msg_qbytes != 0 &&
+			    msqkptr->cred && msqkptr->cred->cr_prison == cred->cr_prison &&
 			    msqkptr->u.msg_perm.key == key)
 				break;
 		}
@@ -718,6 +763,8 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
 		goto done2;
 	}
 
+	if ((error = cr_cansee_msq(td->td_ucred, msqkptr)))
+		goto done2;
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
 		DPRINTF(("requester doesn't have write access\n"));
 		goto done2;
@@ -1081,6 +1128,8 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
 		goto done2;
 	}
 
+	if ((error = cr_cansee_msq(td->td_ucred, msqkptr)))
+		goto done2;
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
 		DPRINTF(("requester doesn't have read access\n"));
 		goto done2;
@@ -1320,9 +1369,37 @@ sys_msgrcv(td, uap)
 static int
 sysctl_msqids(SYSCTL_HANDLER_ARGS)
 {
+	int error;
+	struct sbuf sb;
+	struct msqid_kernel tmp, empty;
+	int i;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		goto done;
+	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct msqid_kernel) * msginfo.msgmni, req);
+
+	bzero(&empty, sizeof(empty));
+	for (i = 0; i < msginfo.msgmni; i++) {
+		struct msqid_kernel *msqkptr;
+
+		msqkptr = &msqids[i];
+		if (msqkptr->u.msg_qbytes == 0 ||
+		    cr_cansee_msq(req->td->td_ucred, msqkptr)) {
+			msqkptr = &empty;
+		} else if (req->td->td_ucred->cr_prison != msqkptr->cred->cr_prison) {
+			bcopy(msqkptr, &tmp, sizeof(tmp));
+			msqkptr = &tmp;
+			msqkptr->u.msg_perm.key = IPC_PRIVATE;
+		}
 
-	return (SYSCTL_OUT(req, msqids,
-	    sizeof(struct msqid_kernel) * msginfo.msgmni));
+		sbuf_bcat(&sb, msqkptr, sizeof(*msqkptr));
+	}
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+done:
+	return (error);
 }
 
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
index f9ff217..5b9e1ca 100644
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
+#include <sys/sbuf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
@@ -79,6 +80,9 @@ static int semunload(void);
 static void semexit_myhook(void *arg, struct proc *p);
 static int sysctl_sema(SYSCTL_HANDLER_ARGS);
 static int semvalid(int semid, struct semid_kernel *semakptr);
+static int cr_cansee_sem(struct ucred *cred, struct semid_kernel *semakptr);
+static void sem_remove(int semidx, struct ucred *cred);
+static void sysvsem_cleanup_for_prison_myhook(struct prison *pr);
 
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args;
@@ -287,6 +291,7 @@ seminit(void)
 	mtx_init(&sem_undo_mtx, "semu", NULL, MTX_DEF);
 	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
 	    EVENTHANDLER_PRI_ANY);
+	sysvsem_cleanup_for_prison_hook = &sysvsem_cleanup_for_prison_myhook;
 
 	error = syscall_helper_register(sem_syscalls);
 	if (error != 0)
@@ -313,6 +318,7 @@ semunload(void)
 #endif
 	syscall_helper_unregister(sem_syscalls);
 	EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
+	sysvsem_cleanup_for_prison_hook = NULL;
 #ifdef MAC
 	for (i = 0; i < seminfo.semmni; i++)
 		mac_sysvsem_destroy(&sema[i]);
@@ -506,6 +512,70 @@ semvalid(int semid, struct semid_kernel *semakptr)
 	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
 }
 
+static int
+cr_cansee_sem(struct ucred *cred, struct semid_kernel *semakptr)
+{
+
+	if (semakptr->cred == NULL || prison_check(cred, semakptr->cred))
+		return (EINVAL);
+	return (0);
+}
+
+static void
+sem_remove(int semidx, struct ucred *cred)
+{
+	int i;
+	struct semid_kernel *semakptr;
+
+	KASSERT(semidx >= 0 && semidx < seminfo.semmni, ("semidx out of bounds"));
+	semakptr = &sema[semidx];
+	semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0;
+	semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0;
+	semakptr->u.sem_perm.mode = 0;
+	racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
+	crfree(semakptr->cred);
+	semakptr->cred = NULL;
+	SEMUNDO_LOCK();
+	semundo_clear(semidx, -1);
+	SEMUNDO_UNLOCK();
+#ifdef MAC
+	mac_sysvsem_cleanup(semakptr);
+#endif
+	wakeup(semakptr);
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].u.sem_base > semakptr->u.sem_base)
+			mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
+	}
+	for (i = semakptr->u.sem_base - sem; i < semtot; i++)
+		sem[i] = sem[i + semakptr->u.sem_nsems];
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].u.sem_base > semakptr->u.sem_base) {
+			sema[i].u.sem_base -= semakptr->u.sem_nsems;
+			mtx_unlock(&sema_mtx[i]);
+		}
+	}
+	semtot -= semakptr->u.sem_nsems;
+}
+
+static void
+sysvsem_cleanup_for_prison_myhook(struct prison *pr)
+{
+	int i;
+
+	mtx_lock(&sem_mtx);
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].cred && sema[i].cred->cr_prison == pr) {
+			mtx_lock(&sema_mtx[i]);
+			sem_remove(i, NULL);
+			mtx_unlock(&sema_mtx[i]);
+		}
+	}
+	mtx_unlock(&sem_mtx);
+}
+
 /*
  * Note that the user-mode half of this passes a union, not a pointer.
  */
@@ -610,6 +680,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 			error = EINVAL;
 			goto done2;
 		}
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 #ifdef MAC
@@ -632,6 +704,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	if (cmd == IPC_RMID)
 		mtx_lock(&sem_mtx);
 	mtx_lock(sema_mtxp);
+
 #ifdef MAC
 	error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 	if (error != 0)
@@ -645,41 +718,18 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case IPC_RMID:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
-		semakptr->u.sem_perm.cuid = cred->cr_uid;
-		semakptr->u.sem_perm.uid = cred->cr_uid;
-		semakptr->u.sem_perm.mode = 0;
-		racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
-		crfree(semakptr->cred);
-		semakptr->cred = NULL;
-		SEMUNDO_LOCK();
-		semundo_clear(semidx, -1);
-		SEMUNDO_UNLOCK();
-#ifdef MAC
-		mac_sysvsem_cleanup(semakptr);
-#endif
-		wakeup(semakptr);
-		for (i = 0; i < seminfo.semmni; i++) {
-			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
-			    sema[i].u.sem_base > semakptr->u.sem_base)
-				mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
-		}
-		for (i = semakptr->u.sem_base - sem; i < semtot; i++)
-			sem[i] = sem[i + semakptr->u.sem_nsems];
-		for (i = 0; i < seminfo.semmni; i++) {
-			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
-			    sema[i].u.sem_base > semakptr->u.sem_base) {
-				sema[i].u.sem_base -= semakptr->u.sem_nsems;
-				mtx_unlock(&sema_mtx[i]);
-			}
-		}
-		semtot -= semakptr->u.sem_nsems;
+		sem_remove(semidx, cred);
 		break;
 
 	case IPC_SET:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
 		sbuf = arg->buf;
@@ -693,6 +743,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case IPC_STAT:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
@@ -701,6 +753,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case GETNCNT:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
@@ -713,6 +767,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case GETPID:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
@@ -725,6 +781,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case GETVAL:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
@@ -762,6 +820,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		for (i = 0; i < semakptr->u.sem_nsems; i++)
@@ -774,6 +834,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case GETZCNT:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
@@ -786,6 +848,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	case SETVAL:
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
 			goto done2;
 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
@@ -818,6 +882,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 		if ((error = semvalid(semid, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
+		if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
 			goto done2;
 		for (i = 0; i < semakptr->u.sem_nsems; i++) {
@@ -872,6 +938,7 @@ sys_semget(struct thread *td, struct semget_args *uap)
 	if (key != IPC_PRIVATE) {
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
+			    sema[semid].cred && sema[semid].cred->cr_prison == cred->cr_prison &&
 			    sema[semid].u.sem_perm.key == key)
 				break;
 		}
@@ -1049,6 +1116,8 @@ sys_semop(struct thread *td, struct semop_args *uap)
 		error = EINVAL;
 		goto done2;
 	}
+	if ((error = cr_cansee_sem(td->td_ucred, semakptr)) != 0)
+		goto done2;
 	/*
 	 * Initial pass thru sops to see what permissions are needed.
 	 * Also perform any checks that don't need repeating on each
@@ -1372,9 +1441,37 @@ semexit_myhook(void *arg, struct proc *p)
 static int
 sysctl_sema(SYSCTL_HANDLER_ARGS)
 {
+	int error;
+	struct sbuf sb;
+	struct semid_kernel tmp, empty;
+	int i;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		goto done;
+	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct semid_kernel) * seminfo.semmni, req);
 
-	return (SYSCTL_OUT(req, sema,
-	    sizeof(struct semid_kernel) * seminfo.semmni));
+	bzero(&empty, sizeof(empty));
+	for (i = 0; i < seminfo.semmni; i++) {
+		struct semid_kernel *semakptr;
+
+		semakptr = &sema[i];
+		if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
+		    cr_cansee_sem(req->td->td_ucred, semakptr)) {
+			semakptr = &empty;
+		} else if (req->td->td_ucred->cr_prison != semakptr->cred->cr_prison) {
+			bcopy(semakptr, &tmp, sizeof(tmp));
+			semakptr = &tmp;
+			semakptr->u.sem_perm.key = IPC_PRIVATE;
+		}
+
+		sbuf_bcat(&sb, semakptr, sizeof(*semakptr));
+	}
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+done:
+	return (error);
 }
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 66a2a43..52760cf 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/jail.h>
+#include <sys/sbuf.h>
 
 #include <security/mac/mac_framework.h>
 
@@ -120,7 +121,7 @@ struct shmmap_state {
 };
 
 static void shm_deallocate_segment(struct shmid_kernel *);
-static int shm_find_segment_by_key(key_t);
+static int shm_find_segment_by_key(struct prison *, key_t);
 static struct shmid_kernel *shm_find_segment(int, bool);
 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
 static void shmrealloc(void);
@@ -130,6 +131,9 @@ static int shmunload(void);
 static void shmexit_myhook(struct vmspace *vm);
 static void shmfork_myhook(struct proc *p1, struct proc *p2);
 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
+static int cr_cansee_shm(struct ucred *, struct shmid_kernel *);
+static void shm_remove(struct shmid_kernel *, int);
+static void sysvshm_cleanup_for_prison_myhook(struct prison *);
 
 /*
  * Tuneable values.
@@ -189,12 +193,13 @@ static struct sx sysvshmsx;
 #define	SYSVSHM_ASSERT_LOCKED()	sx_assert(&sysvshmsx, SA_XLOCKED)
 
 static int
-shm_find_segment_by_key(key_t key)
+shm_find_segment_by_key(struct prison *pr, key_t key)
 {
 	int i;
 
 	for (i = 0; i < shmalloced; i++)
 		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+		    shmsegs[i].cred && shmsegs[i].cred->cr_prison == pr &&
 		    shmsegs[i].u.shm_perm.key == key)
 			return (i);
 	return (-1);
@@ -272,6 +277,45 @@ shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
 }
 
 static int
+cr_cansee_shm(struct ucred *cred, struct shmid_kernel *shmseg)
+{
+
+	if (shmseg->cred == NULL || prison_check(cred, shmseg->cred))
+		return (EINVAL);
+	return (0);
+}
+
+static void
+shm_remove(struct shmid_kernel *shmseg, int segnum)
+{
+
+	shmseg->u.shm_perm.key = IPC_PRIVATE;
+	shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
+	if (shmseg->u.shm_nattch <= 0) {
+		shm_deallocate_segment(shmseg);
+		shm_last_free = segnum;
+	}
+}
+
+static void
+sysvshm_cleanup_for_prison_myhook(struct prison *pr)
+{
+	int i;
+
+	SYSVSHM_LOCK();
+	for (i = 0; i < shmalloced; i++) {
+		struct shmid_kernel *shmseg;
+
+		shmseg = &shmsegs[i];
+		if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+		    shmseg->cred->cr_prison == pr) {
+			shm_remove(shmseg, i);
+		}
+	}
+	SYSVSHM_UNLOCK();
+}
+
+static int
 kern_shmdt_locked(struct thread *td, const void *shmaddr)
 {
 	struct proc *p = td->td_proc;
@@ -348,6 +392,9 @@ kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
 	shmseg = shm_find_segment(shmid, true);
 	if (shmseg == NULL)
 		return (EINVAL);
+	error = cr_cansee_shm(td->td_ucred, shmseg);
+	if (error != 0)
+		return (error);
 	error = ipcperm(td, &shmseg->u.shm_perm,
 	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
 	if (error != 0)
@@ -477,6 +524,9 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
 	shmseg = shm_find_segment(shmid, cmd != SHM_STAT);
 	if (shmseg == NULL)
 		return (EINVAL);
+	error = cr_cansee_shm(td->td_ucred, shmseg);
+	if (error != 0)
+		return (error);
 #ifdef MAC
 	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
 	if (error != 0)
@@ -512,12 +562,8 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
 		if (error != 0)
 			return (error);
-		shmseg->u.shm_perm.key = IPC_PRIVATE;
-		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
-		if (shmseg->u.shm_nattch <= 0) {
-			shm_deallocate_segment(shmseg);
-			shm_last_free = IPCID_TO_IX(shmid);
-		}
+
+		shm_remove(shmseg, IPCID_TO_IX(shmid));
 		break;
 #if 0
 	case SHM_LOCK:
@@ -727,7 +773,7 @@ sys_shmget(struct thread *td, struct shmget_args *uap)
 	if (uap->key == IPC_PRIVATE) {
 		error = shmget_allocate_segment(td, uap, mode);
 	} else {
-		segnum = shm_find_segment_by_key(uap->key);
+		segnum = shm_find_segment_by_key(td->td_ucred->cr_prison, uap->key);
 		if (segnum >= 0)
 			error = shmget_existing(td, uap, mode, segnum);
 		else if ((uap->shmflg & IPC_CREAT) == 0)
@@ -883,6 +929,7 @@ shminit(void)
 	sx_init(&sysvshmsx, "sysvshmsx");
 	shmexit_hook = &shmexit_myhook;
 	shmfork_hook = &shmfork_myhook;
+	sysvshm_cleanup_for_prison_hook = &sysvshm_cleanup_for_prison_myhook;
 
 	error = syscall_helper_register(shm_syscalls);
 	if (error != 0)
@@ -923,6 +970,7 @@ shmunload(void)
 	free(shmsegs, M_SHM);
 	shmexit_hook = NULL;
 	shmfork_hook = NULL;
+	sysvshm_cleanup_for_prison_hook = NULL;
 	sx_destroy(&sysvshmsx);
 	return (0);
 }
@@ -931,9 +979,38 @@ static int
 sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
 {
 	int error;
+	struct sbuf sb;
+	struct shmid_kernel tmp, empty;
+	int i;
 
 	SYSVSHM_LOCK();
-	error = SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0]));
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		goto done;
+	sbuf_new_for_sysctl(&sb, NULL, shmalloced * sizeof(shmsegs[0]), req);
+
+	bzero(&empty, sizeof(empty));
+	empty.u.shm_perm.mode = SHMSEG_FREE;
+	for (i = 0; i < shmalloced; i++) {
+		struct shmid_kernel *shmseg;
+
+		shmseg = &shmsegs[i];
+		if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
+		    cr_cansee_shm(req->td->td_ucred, &shmsegs[i])) {
+			shmseg = &empty;
+		} else if (req->td->td_ucred->cr_prison != shmseg->cred->cr_prison) {
+			bcopy(shmseg, &tmp, sizeof(tmp));
+			shmseg = &tmp;
+			shmseg->u.shm_perm.key = IPC_PRIVATE;
+		}
+
+		sbuf_bcat(&sb, shmseg, sizeof(*shmseg));
+	}
+	error = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+done:
 	SYSVSHM_UNLOCK();
 	return (error);
 }
@@ -977,6 +1054,11 @@ oshmctl(struct thread *td, struct oshmctl_args *uap)
 		SYSVSHM_UNLOCK();
 		return (EINVAL);
 	}
+	error = cr_cansee_shm(td->td_ucred, shmseg);
+	if (error != 0) {
+		SYSVSHM_UNLOCK();
+		return (error);
+	}
 	error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
 	if (error != 0) {
 		SYSVSHM_UNLOCK();
diff --git a/sys/sys/ipc.h b/sys/sys/ipc.h
index e643d48..88b5f14 100644
--- a/sys/sys/ipc.h
+++ b/sys/sys/ipc.h
@@ -126,6 +126,7 @@ struct ipc_perm {
 struct thread;
 struct proc;
 struct vmspace;
+struct prison;
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
@@ -133,9 +134,13 @@ void	ipcperm_old2new(struct ipc_perm_old *, struct ipc_perm *);
 void	ipcperm_new2old(struct ipc_perm *, struct ipc_perm_old *);
 #endif
 
+void ipc_cleanup_for_prison(struct prison *);
 int	ipcperm(struct thread *, struct ipc_perm *, int);
 extern void (*shmfork_hook)(struct proc *, struct proc *);
 extern void (*shmexit_hook)(struct vmspace *);
+extern void (*sysvshm_cleanup_for_prison_hook)(struct prison *);
+extern void (*sysvmsg_cleanup_for_prison_hook)(struct prison *);
+extern void (*sysvsem_cleanup_for_prison_hook)(struct prison *);
 
 #else /* ! _KERNEL */
 
_______________________________________________
freebsd-jail@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-jail
To unsubscribe, send any mail to "freebsd-jail-unsubscr...@freebsd.org"

Reply via email to