On Thu, Apr 27, 2023 at 02:54:38PM +0200, Claudio Jeker wrote: > On Thu, Apr 27, 2023 at 01:55:33PM +0300, Vitaliy Makkoveev wrote: > > Now only direct netlock used for inet sockets protection. The unlocked > > access to all other sockets is safe, but we could lost consistency for a > > little. Since the solock() used for sockets protection, make locking > > path common and use it. Make it shared because this is read-only access > > to sockets data. For same reason use shared netlock while performing > > inpcb tables foreach walkthrough. > > This is still wrong. fill_file() is not allowed to sleep. So you can not > use a rwlock in here. fill_file is called inside a allprocess > LIST_FOREACH loop and sleeping there will blow up. > > Maybe it is enough to grab a shared NETLOCK around the LIST_FOREACH() to > ensure that we don't sleep inside. > > Please fix this properly. Right now running fstat is like playing russian > roulette. >
Except __pmap_asid_alloc() from arch/sh/sh/pmap.c of uniprocessor machine, proc_stop_sweep() is the only place where `ps_list' can't be protected by `allprocess_lock' rwlock(9). Since the refactoring is not obvious, for now I propose to use two locks for `ps_list' as we do for some other data structures like `ps_threads' or `if_list'. For the code paths where context switch is unwanted, the read only acces to `ps_list' will be protected by kernel lock, and by `allprocess_lock' rwlock(9) in all the rest. The `ps_list' list modification will require both locks to be held. Not the best solution, but it fixes sysctl_file(). Index: sys/kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.211 diff -u -p -r1.211 kern_exit.c --- sys/kern/kern_exit.c 25 Apr 2023 18:14:06 -0000 1.211 +++ sys/kern/kern_exit.c 16 May 2023 11:14:49 -0000 @@ -223,6 +223,8 @@ exit1(struct proc *p, int xexit, int xsi p->p_fd = NULL; /* zap the thread's copy */ + rw_enter_write(&allprocess_lock); + /* * Remove proc from pidhash chain and allproc so looking * it up won't work. We will put the proc on the @@ -295,6 +297,8 @@ exit1(struct proc *p, int xexit, int xsi process_clear_orphan(qr); } } + + rw_exit_write(&allprocess_lock); /* add thread's accumulated rusage into the process's total */ ruadd(rup, &p->p_ru); Index: sys/kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.247 diff -u -p -r1.247 kern_fork.c --- sys/kern/kern_fork.c 25 Apr 2023 18:14:06 -0000 1.247 +++ sys/kern/kern_fork.c 16 May 2023 11:14:49 -0000 @@ -62,6 +62,11 @@ #include <uvm/uvm.h> #include <machine/tcb.h> +/* + * Locks used to protect struct members in this file: + * P allprocess_lock + */ + int nprocesses = 1; /* process 0 */ int nthreads = 1; /* proc 0 */ struct forkstat forkstat; @@ -372,6 +377,8 @@ fork1(struct proc *curp, int flags, void return (ENOMEM); } + rw_enter_write(&allprocess_lock); + /* * From now on, we're committed to the fork and cannot fail. */ @@ -468,19 +475,28 @@ fork1(struct proc *curp, int flags, void knote_locked(&curpr->ps_klist, NOTE_FORK | pr->ps_pid); /* - * Update stats now that we know the fork was successful. + * Return child pid to parent process */ - uvmexp.forks++; - if (flags & FORK_PPWAIT) - uvmexp.forks_ppwait++; - if (flags & FORK_SHAREVM) - uvmexp.forks_sharevm++; + if (retval != NULL) + *retval = pr->ps_pid; /* * Pass a pointer to the new process to the caller. + * XXX but we don't return `rnewprocp' to sys_fork() + * or sys_vfork(). */ if (rnewprocp != NULL) *rnewprocp = p; + rw_exit_write(&allprocess_lock); + + /* + * Update stats now that we know the fork was successful. + */ + uvmexp.forks++; + if (flags & FORK_PPWAIT) + uvmexp.forks_ppwait++; + if (flags & FORK_SHAREVM) + uvmexp.forks_sharevm++; /* * Preserve synchronization semantics of vfork. If waiting for @@ -499,11 +515,6 @@ fork1(struct proc *curp, int flags, void if ((flags & FORK_PTRACE) && (curpr->ps_flags & PS_TRACED)) psignal(curp, SIGTRAP); - /* - * Return child pid to parent process - */ - if (retval != NULL) - *retval = pr->ps_pid; return (0); } @@ -610,7 +621,7 @@ alloctid(void) /* * Checks for current use of a pid, either as a pid or pgid. */ -pid_t oldpids[128]; +pid_t oldpids[128]; /* [P] */ int ispidtaken(pid_t pid) { Index: sys/kern/kern_ktrace.c =================================================================== RCS file: /cvs/src/sys/kern/kern_ktrace.c,v retrieving revision 1.112 diff -u -p -r1.112 kern_ktrace.c --- sys/kern/kern_ktrace.c 11 May 2023 09:51:33 -0000 1.112 +++ sys/kern/kern_ktrace.c 16 May 2023 11:14:49 -0000 @@ -434,6 +434,7 @@ doktrace(struct vnode *vp, int ops, int * Clear all uses of the tracefile */ if (ops == KTROP_CLEARFILE) { + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { if (pr->ps_tracevp == vp) { if (ktrcanset(p, pr)) @@ -442,6 +443,7 @@ doktrace(struct vnode *vp, int ops, int error = EPERM; } } + rw_exit_read(&allprocess_lock); goto done; } /* @@ -674,12 +676,14 @@ bad: */ log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", error); + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { if (pr == curp->p_p) continue; if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) ktrcleartrace(pr); } + rw_exit_read(&allprocess_lock); ktrcleartrace(curp->p_p); return (error); } Index: sys/kern/kern_proc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.94 diff -u -p -r1.94 kern_proc.c --- sys/kern/kern_proc.c 2 Jan 2023 23:09:48 -0000 1.94 +++ sys/kern/kern_proc.c 16 May 2023 11:14:49 -0000 @@ -46,6 +46,8 @@ /* * Locks used to protect struct members in this file: * I immutable after creation + * K kernel lock + * P allprocess_lock * U uidinfolk */ @@ -54,6 +56,8 @@ struct rwlock uidinfolk; LIST_HEAD(uihashhead, uidinfo) *uihashtbl; /* [U] */ u_long uihash; /* [I] size of hash table - 1 */ +struct rwlock allprocess_lock; + /* * Other process lists */ @@ -63,7 +67,7 @@ struct pidhashhead *pidhashtbl; u_long pidhash; struct pgrphashhead *pgrphashtbl; u_long pgrphash; -struct processlist allprocess; +struct processlist allprocess; /* [K|P] */ struct processlist zombprocess; struct proclist allproc; @@ -92,6 +96,7 @@ procinit(void) LIST_INIT(&zombprocess); LIST_INIT(&allproc); + rw_init(&allprocess_lock, "allpslk"); rw_init(&uidinfolk, "uidinfo"); tidhashtbl = hashinit(maxthread / 4, M_PROC, M_NOWAIT, &tidhash); Index: sys/kern/kern_resource.c =================================================================== RCS file: /cvs/src/sys/kern/kern_resource.c,v retrieving revision 1.77 diff -u -p -r1.77 kern_resource.c --- sys/kern/kern_resource.c 4 Feb 2023 19:33:03 -0000 1.77 +++ sys/kern/kern_resource.c 16 May 2023 11:14:49 -0000 @@ -122,10 +122,12 @@ sys_getpriority(struct proc *curp, void case PRIO_USER: if (SCARG(uap, who) == 0) SCARG(uap, who) = curp->p_ucred->cr_uid; + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) if (pr->ps_ucred->cr_uid == SCARG(uap, who) && pr->ps_nice < low) low = pr->ps_nice; + rw_exit_read(&allprocess_lock); break; default: @@ -178,11 +180,13 @@ sys_setpriority(struct proc *curp, void case PRIO_USER: if (SCARG(uap, who) == 0) SCARG(uap, who) = curp->p_ucred->cr_uid; + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) if (pr->ps_ucred->cr_uid == SCARG(uap, who)) { error = donice(curp, pr, SCARG(uap, prio)); found = 1; } + rw_exit_read(&allprocess_lock); break; default: Index: sys/kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.306 diff -u -p -r1.306 kern_sig.c --- sys/kern/kern_sig.c 3 Apr 2023 11:57:50 -0000 1.306 +++ sys/kern/kern_sig.c 16 May 2023 11:14:49 -0000 @@ -670,6 +670,7 @@ killpg1(struct proc *cp, int signum, int /* * broadcast */ + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { if (pr->ps_pid <= 1 || pr->ps_flags & (PS_SYSTEM | PS_NOBROADCASTKILL) || @@ -679,6 +680,7 @@ killpg1(struct proc *cp, int signum, int if (signum) prsignal(pr, signum); } + rw_exit_read(&allprocess_lock); } else { if (pgid == 0) /* Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.412 diff -u -p -r1.412 kern_sysctl.c --- sys/kern/kern_sysctl.c 4 May 2023 09:40:36 -0000 1.412 +++ sys/kern/kern_sysctl.c 16 May 2023 11:14:49 -0000 @@ -1426,6 +1426,7 @@ sysctl_file(int *name, u_int namelen, ch break; } matched = 0; + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { /* * skip system, exiting, embryonic and undead @@ -1454,10 +1455,12 @@ sysctl_file(int *name, u_int namelen, ch FRELE(fp, p); } } + rw_exit_read(&allprocess_lock); if (!matched) error = ESRCH; break; case KERN_FILE_BYUID: + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { /* * skip system, exiting, embryonic and undead @@ -1483,6 +1486,7 @@ sysctl_file(int *name, u_int namelen, ch FRELE(fp, p); } } + rw_exit_read(&allprocess_lock); break; default: error = EINVAL; @@ -1538,8 +1542,9 @@ sysctl_doproc(int *name, u_int namelen, if (where != NULL) kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK); - pr = LIST_FIRST(&allprocess); doingzomb = 0; + rw_enter_read(&allprocess_lock); + pr = LIST_FIRST(&allprocess); again: for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) { /* XXX skip processes in the middle of being zapped */ @@ -1602,6 +1607,7 @@ again: break; default: + rw_exit_read(&allprocess_lock); error = EINVAL; goto err; } @@ -1609,8 +1615,10 @@ again: if (buflen >= elem_size && elem_count > 0) { fill_kproc(pr, kproc, NULL, show_pointers); error = copyout(kproc, dp, elem_size); - if (error) + if (error) { + rw_exit_read(&allprocess_lock); goto err; + } dp += elem_size; buflen -= elem_size; elem_count--; @@ -1625,8 +1633,10 @@ again: if (buflen >= elem_size && elem_count > 0) { fill_kproc(pr, kproc, p, show_pointers); error = copyout(kproc, dp, elem_size); - if (error) + if (error) { + rw_exit_read(&allprocess_lock); goto err; + } dp += elem_size; buflen -= elem_size; elem_count--; @@ -1639,6 +1649,8 @@ again: doingzomb++; goto again; } + rw_exit_read(&allprocess_lock); + if (where != NULL) { *sizep = dp - where; if (needed > *sizep) { Index: sys/kern/kern_unveil.c =================================================================== RCS file: /cvs/src/sys/kern/kern_unveil.c,v retrieving revision 1.55 diff -u -p -r1.55 kern_unveil.c --- sys/kern/kern_unveil.c 5 Dec 2022 23:18:37 -0000 1.55 +++ sys/kern/kern_unveil.c 16 May 2023 11:14:49 -0000 @@ -823,6 +823,7 @@ unveil_removevnode(struct vnode *vp) vref(vp); /* make sure it is held till we are done */ + rw_assert_rdlock(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { struct unveil * uv; Index: sys/kern/vfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.361 diff -u -p -r1.361 vfs_syscalls.c --- sys/kern/vfs_syscalls.c 11 Feb 2023 23:22:17 -0000 1.361 +++ sys/kern/vfs_syscalls.c 16 May 2023 11:14:49 -0000 @@ -325,6 +325,7 @@ checkdirs(struct vnode *olddp) return; if (VFS_ROOT(olddp->v_mountedhere, &newdp)) panic("mount: lost mount"); + rw_enter_read(&allprocess_lock); LIST_FOREACH(pr, &allprocess, ps_list) { fdp = pr->ps_fd; if (fdp->fd_cdir == olddp) { @@ -338,6 +339,7 @@ checkdirs(struct vnode *olddp) fdp->fd_rdir = newdp; } } + rw_exit_read(&allprocess_lock); if (rootvnode == olddp) { free_count++; vref(newdp); @@ -483,12 +485,20 @@ dounmount_leaf(struct mount *mp, int fla } /* + * Protect `allprocess' list access within + * unveil_removevnode(). Do this here to avoid + * context switch within `mnt_vnodelist' foreach + * loop. + */ + rw_enter_read(&allprocess_lock); + /* * Before calling file system unmount, make sure * all unveils to vnodes in here are dropped. */ TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) { unveil_removevnode(vp); } + rw_exit_read(&allprocess_lock); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) || Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.340 diff -u -p -r1.340 proc.h --- sys/sys/proc.h 25 Apr 2023 18:14:06 -0000 1.340 +++ sys/sys/proc.h 16 May 2023 11:14:49 -0000 @@ -124,6 +124,7 @@ struct unveil; * K kernel lock * m this process' `ps_mtx' * p this process' `ps_lock' + * P allprocess_lock * R rlimit_lock * S scheduler lock * T itimer_mtx @@ -137,7 +138,7 @@ struct process { struct proc *ps_mainproc; struct ucred *ps_ucred; /* Process owner's identity. */ - LIST_ENTRY(process) ps_list; /* List of all processes. */ + LIST_ENTRY(process) ps_list; /* [K|P] List of all processes. */ TAILQ_HEAD(,proc) ps_threads; /* [K|S] Threads in this process. */ LIST_ENTRY(process) ps_pglist; /* List of processes in pgrp. */ @@ -502,6 +503,8 @@ extern struct proc proc0; /* Process sl extern struct process process0; /* Process slot for kernel threads. */ extern int nprocesses, maxprocess; /* Cur and max number of processes. */ extern int nthreads, maxthread; /* Cur and max number of threads. */ + +extern struct rwlock allprocess_lock; LIST_HEAD(proclist, proc); LIST_HEAD(processlist, process);