On Thu, Apr 27, 2023 at 02:54:38PM +0200, Claudio Jeker wrote:
> On Thu, Apr 27, 2023 at 01:55:33PM +0300, Vitaliy Makkoveev wrote:
> > Now only direct netlock used for inet sockets protection. The unlocked
> > access to all other sockets is safe, but we could lost consistency for a
> > little. Since the solock() used for sockets protection, make locking
> > path common and use it. Make it shared because this is read-only access
> > to sockets data. For same reason use shared netlock while performing
> > inpcb tables foreach walkthrough.
> 
> This is still wrong. fill_file() is not allowed to sleep. So you can not
> use a rwlock in here. fill_file is called inside a allprocess
> LIST_FOREACH loop and sleeping there will blow up.
>  
> Maybe it is enough to grab a shared NETLOCK around the LIST_FOREACH() to
> ensure that we don't sleep inside.
> 
> Please fix this properly. Right now running fstat is like playing russian
> roulette.
> 

Except __pmap_asid_alloc() from arch/sh/sh/pmap.c of uniprocessor
machine, proc_stop_sweep() is the only place where `ps_list' can't be
protected by `allprocess_lock' rwlock(9). Since the refactoring is not
obvious, for now  I propose to use two locks for `ps_list' as we do for
some other data structures like `ps_threads' or `if_list'. For the code
paths where context switch is unwanted, the read only acces to `ps_list'
will be protected by kernel lock, and by `allprocess_lock' rwlock(9) in
all the rest. The `ps_list' list modification will require both locks to
be held. Not the best solution, but it fixes sysctl_file().

Index: sys/kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.211
diff -u -p -r1.211 kern_exit.c
--- sys/kern/kern_exit.c        25 Apr 2023 18:14:06 -0000      1.211
+++ sys/kern/kern_exit.c        16 May 2023 11:14:49 -0000
@@ -223,6 +223,8 @@ exit1(struct proc *p, int xexit, int xsi
 
        p->p_fd = NULL;         /* zap the thread's copy */
 
+       rw_enter_write(&allprocess_lock);
+
         /*
         * Remove proc from pidhash chain and allproc so looking
         * it up won't work.  We will put the proc on the
@@ -295,6 +297,8 @@ exit1(struct proc *p, int xexit, int xsi
                        process_clear_orphan(qr);
                }
        }
+
+       rw_exit_write(&allprocess_lock);
 
        /* add thread's accumulated rusage into the process's total */
        ruadd(rup, &p->p_ru);
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.247
diff -u -p -r1.247 kern_fork.c
--- sys/kern/kern_fork.c        25 Apr 2023 18:14:06 -0000      1.247
+++ sys/kern/kern_fork.c        16 May 2023 11:14:49 -0000
@@ -62,6 +62,11 @@
 #include <uvm/uvm.h>
 #include <machine/tcb.h>
 
+/*
+ * Locks used to protect struct members in this file:
+ *     P       allprocess_lock
+ */
+
 int    nprocesses = 1;         /* process 0 */
 int    nthreads = 1;           /* proc 0 */
 struct forkstat forkstat;
@@ -372,6 +377,8 @@ fork1(struct proc *curp, int flags, void
                return (ENOMEM);
        }
 
+       rw_enter_write(&allprocess_lock);
+
        /*
         * From now on, we're committed to the fork and cannot fail.
         */
@@ -468,19 +475,28 @@ fork1(struct proc *curp, int flags, void
        knote_locked(&curpr->ps_klist, NOTE_FORK | pr->ps_pid);
 
        /*
-        * Update stats now that we know the fork was successful.
+        * Return child pid to parent process
         */
-       uvmexp.forks++;
-       if (flags & FORK_PPWAIT)
-               uvmexp.forks_ppwait++;
-       if (flags & FORK_SHAREVM)
-               uvmexp.forks_sharevm++;
+       if (retval != NULL)
+               *retval = pr->ps_pid;
 
        /*
         * Pass a pointer to the new process to the caller.
+        * XXX but we don't return `rnewprocp' to sys_fork()
+        * or sys_vfork().
         */
        if (rnewprocp != NULL)
                *rnewprocp = p;
+       rw_exit_write(&allprocess_lock);
+
+       /*
+        * Update stats now that we know the fork was successful.
+        */
+       uvmexp.forks++;
+       if (flags & FORK_PPWAIT)
+               uvmexp.forks_ppwait++;
+       if (flags & FORK_SHAREVM)
+               uvmexp.forks_sharevm++;
 
        /*
         * Preserve synchronization semantics of vfork.  If waiting for
@@ -499,11 +515,6 @@ fork1(struct proc *curp, int flags, void
        if ((flags & FORK_PTRACE) && (curpr->ps_flags & PS_TRACED))
                psignal(curp, SIGTRAP);
 
-       /*
-        * Return child pid to parent process
-        */
-       if (retval != NULL)
-               *retval = pr->ps_pid;
        return (0);
 }
 
@@ -610,7 +621,7 @@ alloctid(void)
 /*
  * Checks for current use of a pid, either as a pid or pgid.
  */
-pid_t oldpids[128];
+pid_t oldpids[128];    /* [P] */
 int
 ispidtaken(pid_t pid)
 {
Index: sys/kern/kern_ktrace.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
retrieving revision 1.112
diff -u -p -r1.112 kern_ktrace.c
--- sys/kern/kern_ktrace.c      11 May 2023 09:51:33 -0000      1.112
+++ sys/kern/kern_ktrace.c      16 May 2023 11:14:49 -0000
@@ -434,6 +434,7 @@ doktrace(struct vnode *vp, int ops, int 
         * Clear all uses of the tracefile
         */
        if (ops == KTROP_CLEARFILE) {
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        if (pr->ps_tracevp == vp) {
                                if (ktrcanset(p, pr))
@@ -442,6 +443,7 @@ doktrace(struct vnode *vp, int ops, int 
                                        error = EPERM;
                        }
                }
+               rw_exit_read(&allprocess_lock);
                goto done;
        }
        /*
@@ -674,12 +676,14 @@ bad:
         */
        log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
            error);
+       rw_enter_read(&allprocess_lock);
        LIST_FOREACH(pr, &allprocess, ps_list) {
                if (pr == curp->p_p)
                        continue;
                if (pr->ps_tracevp == vp && pr->ps_tracecred == cred)
                        ktrcleartrace(pr);
        }
+       rw_exit_read(&allprocess_lock);
        ktrcleartrace(curp->p_p);
        return (error);
 }
Index: sys/kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.94
diff -u -p -r1.94 kern_proc.c
--- sys/kern/kern_proc.c        2 Jan 2023 23:09:48 -0000       1.94
+++ sys/kern/kern_proc.c        16 May 2023 11:14:49 -0000
@@ -46,6 +46,8 @@
 /*
  *  Locks used to protect struct members in this file:
  *     I       immutable after creation
+ *     K       kernel lock
+ *     P       allprocess_lock
  *     U       uidinfolk
  */
 
@@ -54,6 +56,8 @@ struct rwlock uidinfolk;
 LIST_HEAD(uihashhead, uidinfo) *uihashtbl;     /* [U] */
 u_long uihash;                         /* [I] size of hash table - 1 */
 
+struct rwlock allprocess_lock;
+
 /*
  * Other process lists
  */
@@ -63,7 +67,7 @@ struct pidhashhead *pidhashtbl;
 u_long pidhash;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
-struct processlist allprocess;
+struct processlist allprocess;         /* [K|P] */
 struct processlist zombprocess;
 struct proclist allproc;
 
@@ -92,6 +96,7 @@ procinit(void)
        LIST_INIT(&zombprocess);
        LIST_INIT(&allproc);
 
+       rw_init(&allprocess_lock, "allpslk");
        rw_init(&uidinfolk, "uidinfo");
 
        tidhashtbl = hashinit(maxthread / 4, M_PROC, M_NOWAIT, &tidhash);
Index: sys/kern/kern_resource.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_resource.c,v
retrieving revision 1.77
diff -u -p -r1.77 kern_resource.c
--- sys/kern/kern_resource.c    4 Feb 2023 19:33:03 -0000       1.77
+++ sys/kern/kern_resource.c    16 May 2023 11:14:49 -0000
@@ -122,10 +122,12 @@ sys_getpriority(struct proc *curp, void 
        case PRIO_USER:
                if (SCARG(uap, who) == 0)
                        SCARG(uap, who) = curp->p_ucred->cr_uid;
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list)
                        if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
                            pr->ps_nice < low)
                                low = pr->ps_nice;
+               rw_exit_read(&allprocess_lock);
                break;
 
        default:
@@ -178,11 +180,13 @@ sys_setpriority(struct proc *curp, void 
        case PRIO_USER:
                if (SCARG(uap, who) == 0)
                        SCARG(uap, who) = curp->p_ucred->cr_uid;
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list)
                        if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
                                error = donice(curp, pr, SCARG(uap, prio));
                                found = 1;
                        }
+               rw_exit_read(&allprocess_lock);
                break;
 
        default:
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.306
diff -u -p -r1.306 kern_sig.c
--- sys/kern/kern_sig.c 3 Apr 2023 11:57:50 -0000       1.306
+++ sys/kern/kern_sig.c 16 May 2023 11:14:49 -0000
@@ -670,6 +670,7 @@ killpg1(struct proc *cp, int signum, int
                /* 
                 * broadcast
                 */
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        if (pr->ps_pid <= 1 ||
                            pr->ps_flags & (PS_SYSTEM | PS_NOBROADCASTKILL) ||
@@ -679,6 +680,7 @@ killpg1(struct proc *cp, int signum, int
                        if (signum)
                                prsignal(pr, signum);
                }
+               rw_exit_read(&allprocess_lock);
        } else {
                if (pgid == 0)
                        /*
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.412
diff -u -p -r1.412 kern_sysctl.c
--- sys/kern/kern_sysctl.c      4 May 2023 09:40:36 -0000       1.412
+++ sys/kern/kern_sysctl.c      16 May 2023 11:14:49 -0000
@@ -1426,6 +1426,7 @@ sysctl_file(int *name, u_int namelen, ch
                        break;
                }
                matched = 0;
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        /*
                         * skip system, exiting, embryonic and undead
@@ -1454,10 +1455,12 @@ sysctl_file(int *name, u_int namelen, ch
                                FRELE(fp, p);
                        }
                }
+               rw_exit_read(&allprocess_lock);
                if (!matched)
                        error = ESRCH;
                break;
        case KERN_FILE_BYUID:
+               rw_enter_read(&allprocess_lock);
                LIST_FOREACH(pr, &allprocess, ps_list) {
                        /*
                         * skip system, exiting, embryonic and undead
@@ -1483,6 +1486,7 @@ sysctl_file(int *name, u_int namelen, ch
                                FRELE(fp, p);
                        }
                }
+               rw_exit_read(&allprocess_lock);
                break;
        default:
                error = EINVAL;
@@ -1538,8 +1542,9 @@ sysctl_doproc(int *name, u_int namelen, 
        if (where != NULL)
                kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
 
-       pr = LIST_FIRST(&allprocess);
        doingzomb = 0;
+       rw_enter_read(&allprocess_lock);
+       pr = LIST_FIRST(&allprocess);
 again:
        for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
                /* XXX skip processes in the middle of being zapped */
@@ -1602,6 +1607,7 @@ again:
                        break;
 
                default:
+                       rw_exit_read(&allprocess_lock);
                        error = EINVAL;
                        goto err;
                }
@@ -1609,8 +1615,10 @@ again:
                if (buflen >= elem_size && elem_count > 0) {
                        fill_kproc(pr, kproc, NULL, show_pointers);
                        error = copyout(kproc, dp, elem_size);
-                       if (error)
+                       if (error) {
+                               rw_exit_read(&allprocess_lock);
                                goto err;
+                       }
                        dp += elem_size;
                        buflen -= elem_size;
                        elem_count--;
@@ -1625,8 +1633,10 @@ again:
                        if (buflen >= elem_size && elem_count > 0) {
                                fill_kproc(pr, kproc, p, show_pointers);
                                error = copyout(kproc, dp, elem_size);
-                               if (error)
+                               if (error) {
+                                       rw_exit_read(&allprocess_lock);
                                        goto err;
+                               }
                                dp += elem_size;
                                buflen -= elem_size;
                                elem_count--;
@@ -1639,6 +1649,8 @@ again:
                doingzomb++;
                goto again;
        }
+       rw_exit_read(&allprocess_lock);
+
        if (where != NULL) {
                *sizep = dp - where;
                if (needed > *sizep) {
Index: sys/kern/kern_unveil.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_unveil.c,v
retrieving revision 1.55
diff -u -p -r1.55 kern_unveil.c
--- sys/kern/kern_unveil.c      5 Dec 2022 23:18:37 -0000       1.55
+++ sys/kern/kern_unveil.c      16 May 2023 11:14:49 -0000
@@ -823,6 +823,7 @@ unveil_removevnode(struct vnode *vp)
 
        vref(vp); /* make sure it is held till we are done */
 
+       rw_assert_rdlock(&allprocess_lock);
        LIST_FOREACH(pr, &allprocess, ps_list) {
                struct unveil * uv;
 
Index: sys/kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.361
diff -u -p -r1.361 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c     11 Feb 2023 23:22:17 -0000      1.361
+++ sys/kern/vfs_syscalls.c     16 May 2023 11:14:49 -0000
@@ -325,6 +325,7 @@ checkdirs(struct vnode *olddp)
                return;
        if (VFS_ROOT(olddp->v_mountedhere, &newdp))
                panic("mount: lost mount");
+       rw_enter_read(&allprocess_lock);
        LIST_FOREACH(pr, &allprocess, ps_list) {
                fdp = pr->ps_fd;
                if (fdp->fd_cdir == olddp) {
@@ -338,6 +339,7 @@ checkdirs(struct vnode *olddp)
                        fdp->fd_rdir = newdp;
                }
        }
+       rw_exit_read(&allprocess_lock);
        if (rootvnode == olddp) {
                free_count++;
                vref(newdp);
@@ -483,12 +485,20 @@ dounmount_leaf(struct mount *mp, int fla
        }
 
        /*
+        * Protect `allprocess' list access within
+        * unveil_removevnode(). Do this here to avoid
+        * context switch within `mnt_vnodelist' foreach
+        * loop.
+        */
+       rw_enter_read(&allprocess_lock);
+       /*
         * Before calling file system unmount, make sure
         * all unveils to vnodes in here are dropped.
         */
        TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
                unveil_removevnode(vp);
        }
+       rw_exit_read(&allprocess_lock);
 
        if (((mp->mnt_flag & MNT_RDONLY) ||
            (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.340
diff -u -p -r1.340 proc.h
--- sys/sys/proc.h      25 Apr 2023 18:14:06 -0000      1.340
+++ sys/sys/proc.h      16 May 2023 11:14:49 -0000
@@ -124,6 +124,7 @@ struct unveil;
  *     K       kernel lock
  *     m       this process' `ps_mtx'
  *     p       this process' `ps_lock'
+ *     P       allprocess_lock
  *     R       rlimit_lock
  *     S       scheduler lock
  *     T       itimer_mtx
@@ -137,7 +138,7 @@ struct process {
        struct  proc *ps_mainproc;
        struct  ucred *ps_ucred;        /* Process owner's identity. */
 
-       LIST_ENTRY(process) ps_list;    /* List of all processes. */
+       LIST_ENTRY(process) ps_list;    /* [K|P] List of all processes. */
        TAILQ_HEAD(,proc) ps_threads;   /* [K|S] Threads in this process. */
 
        LIST_ENTRY(process) ps_pglist;  /* List of processes in pgrp. */
@@ -502,6 +503,8 @@ extern struct proc proc0;           /* Process sl
 extern struct process process0;                /* Process slot for kernel 
threads. */
 extern int nprocesses, maxprocess;     /* Cur and max number of processes. */
 extern int nthreads, maxthread;                /* Cur and max number of 
threads. */
+
+extern struct rwlock allprocess_lock;
 
 LIST_HEAD(proclist, proc);
 LIST_HEAD(processlist, process);

Reply via email to