At least network subset of sysctl(8) MIBs relies on netlock or another
locks and doesn't require kernel lock. Also some integers in other
subsets can be read without kernel lock held.

Diff below actually pushes kernel lock down to net_sysctl(). It is
required for MPLS and PFLOW cases. The protocol *_sysctls() are left
under kernel lock to make the review of this diff easier. I want to
sequentially unlock them later. The rest of cases are easy to review.
net_lisnk_sysctl() only calls net_ifq_sysctl() which only returns error.
unp_sysctl() does access to integer variables, which are already
accessed in uipc_attach() without kernel lock held. pipex_sysctl() also
accesses integer variable which is lockless accessed in the
pppx(4)/pipex(4) layer.

ok?

Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.411
diff -u -p -r1.411 kern_sysctl.c
--- sys/kern/kern_sysctl.c      22 Jan 2023 12:05:44 -0000      1.411
+++ sys/kern/kern_sysctl.c      21 Apr 2023 13:48:13 -0000
@@ -168,7 +168,7 @@ sys_sysctl(struct proc *p, void *v, regi
                syscallarg(void *) new;
                syscallarg(size_t) newlen;
        } */ *uap = v;
-       int error, dolock = 1;
+       int error, dokernellock = 1, dolock = 1;
        size_t savelen = 0, oldlen = 0;
        sysctlfn *fn;
        int name[CTL_MAXNAME];
@@ -203,6 +203,7 @@ sys_sysctl(struct proc *p, void *v, regi
                break;
        case CTL_NET:
                fn = net_sysctl;
+               dokernellock = 0;
                break;
        case CTL_FS:
                fn = fs_sysctl;
@@ -230,19 +231,22 @@ sys_sysctl(struct proc *p, void *v, regi
        if (SCARG(uap, oldlenp) &&
            (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
                return (error);
+       if (dokernellock)
+               KERNEL_LOCK();
        if (SCARG(uap, old) != NULL) {
                if ((error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR)) != 0)
-                       return (error);
+                       goto unlock;
                if (dolock) {
                        if (atop(oldlen) > uvmexp.wiredmax - uvmexp.wired) {
                                rw_exit_write(&sysctl_lock);
-                               return (ENOMEM);
+                               error = ENOMEM;
+                               goto unlock;
                        }
                        error = uvm_vslock(p, SCARG(uap, old), oldlen,
                            PROT_READ | PROT_WRITE);
                        if (error) {
                                rw_exit_write(&sysctl_lock);
-                               return (error);
+                               goto unlock;
                        }
                }
                savelen = oldlen;
@@ -254,6 +258,9 @@ sys_sysctl(struct proc *p, void *v, regi
                        uvm_vsunlock(p, SCARG(uap, old), savelen);
                rw_exit_write(&sysctl_lock);
        }
+unlock:
+       if (dokernellock)
+               KERNEL_UNLOCK();
        if (error)
                return (error);
        if (SCARG(uap, oldlenp))
Index: sys/kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.246
diff -u -p -r1.246 syscalls.master
--- sys/kern/syscalls.master    25 Feb 2023 09:55:46 -0000      1.246
+++ sys/kern/syscalls.master    21 Apr 2023 13:48:13 -0000
@@ -361,7 +361,7 @@
 199    OBSOL           pad_lseek
 200    OBSOL           pad_truncate
 201    OBSOL           pad_ftruncate
-202    STD             { int sys_sysctl(const int *name, u_int namelen, \
+202    STD NOLOCK      { int sys_sysctl(const int *name, u_int namelen, \
                            void *old, size_t *oldlenp, void *new, \
                            size_t newlen); }
 203    STD             { int sys_mlock(const void *addr, size_t len); }
Index: sys/kern/uipc_domain.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_domain.c,v
retrieving revision 1.60
diff -u -p -r1.60 uipc_domain.c
--- sys/kern/uipc_domain.c      14 Aug 2022 01:58:28 -0000      1.60
+++ sys/kern/uipc_domain.c      21 Apr 2023 13:48:13 -0000
@@ -183,8 +183,8 @@ net_link_sysctl(int *name, u_int namelen
 }
 
 int
-net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen, struct proc *p)
+net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen, struct proc *p)
 {
        const struct domain *dp;
        const struct protosw *pr;
@@ -213,9 +213,13 @@ net_sysctl(int *name, u_int namelen, voi
                    newp, newlen));
 #endif
 #if NPFLOW > 0
-       if (family == PF_PFLOW)
-               return (pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
-                   newp, newlen));
+       if (family == PF_PFLOW) {
+               KERNEL_LOCK();
+               error = pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
+                   newp, newlen);
+               KERNEL_UNLOCK();
+               return (error);
+       }
 #endif
 #ifdef PIPEX
        if (family == PF_PIPEX)
@@ -223,9 +227,13 @@ net_sysctl(int *name, u_int namelen, voi
                    newp, newlen));
 #endif
 #ifdef MPLS
-       if (family == PF_MPLS)
-               return (mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
-                   newp, newlen));
+       if (family == PF_MPLS) {
+               KERNEL_LOCK();
+               error = mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
+                   newp, newlen);
+               KERNEL_UNLOCK();
+               return (error);
+       }
 #endif
        dp = pffinddomain(family);
        if (dp == NULL)
@@ -236,8 +244,10 @@ net_sysctl(int *name, u_int namelen, voi
        protocol = name[1];
        for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
                if (pr->pr_protocol == protocol && pr->pr_sysctl) {
+                       KERNEL_LOCK();
                        error = (*pr->pr_sysctl)(name + 2, namelen - 2,
                            oldp, oldlenp, newp, newlen);
+                       KERNEL_UNLOCK();
                        return (error);
                }
        return (ENOPROTOOPT);

Reply via email to