On Mon, Dec 06, 2021 at 08:35:15PM +0100, Mark Kettenis wrote:
> > Date: Sun, 5 Dec 2021 18:01:04 -0600
> > From: Scott Cheloha <[email protected]>
> >
> > Two things in sys_kbind() need an explicit kernel lock. First,
> > sigexit(). Second, uvm_map_extract(), because the following call
> > chain panics without it:
> >
> > [...]
> >
> > With this committed we can unlock kbind(2).
> >
> > Thoughts? ok?
>
> To be honest, I don't think this makes sense unless you can make the
> "normal" code path lock free. You're replacing a single
> KERNEL_LOCK/UNLOCK pair with (potentially) a bunch of them. That may
> actually make things worse. So I think we need to make
> uvm_map_extract() mpsafe first.
Unlocking uvm_map_extract() would improve things, yes.
I wrote a benchmark so we have something concrete to talk about.
Attached inline at the end. The benchmark spawns 1 or more threads
that repeatedly fork(2), execve(2), and waitpid(2). By default the
child executes /usr/bin/true, the simplest program in base that needs
ld.so. The benchmark stops when a given number of fork/execve/waitpid
cycles have completed. It then reports the elapsed time.
With this patch + kbind(2) unlocked the benchmark completes no faster
in any configuration.
> In case mpi@ disagrees, the unbalanced KERNEL_LOCK() before the
> sigexit() call is unfortunate. I'd add a /* NOTREACHED */ after the
> sigexit() call to signal that the unbalanced KERNEL_LOCK() is ok.
Thanks, wasn't sure about that. Updated.
--
Here's the benchmark. Try something like this:
# How quickly can four threads fork+exec true(1) a
# thousand times?
$ ./forkexecwait -n4 1000 /usr/bin/true
/*
* cc -o forkexecwait forkexecwait.c -lpthread
*/
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
pthread_t *thread;
pthread_barrier_t barrier;
char *default_argv[] = { "true", NULL };
char *default_envp[] = { NULL };
char *exec_path = "/usr/bin/true";
char **exec_argv = default_argv;
struct timespec start, stop;
unsigned long count, max;
volatile int done;
void *thread_func(void *);
void thread_join(pthread_t *, unsigned int);
void thread_spawn(pthread_t **, unsigned int, void *(*)(void *));
void usage(void);
int
main(int argc, char *argv[])
{
struct timespec elapsed;
const char *errstr;
unsigned int nthreads;
int ch, error;
nthreads = 1;
while ((ch = getopt(argc, argv, "n:")) != -1) {
switch (ch) {
case 'n':
nthreads = strtonum(optarg, 1, UINT_MAX, &errstr);
if (errstr != NULL)
errx(1, "nthreads is %s: %s", errstr, optarg);
break;
default:
usage();
}
}
argc -= optind;
argv += optind;
if (argc < 1)
usage();
max = strtonum(argv[0], 1, LONG_MAX, &errstr);
if (errstr != NULL)
errx(1, "count is %s: %s", errstr, argv[0]);
if (argc >= 2) {
exec_path = argv[1];
exec_argv = &argv[1];
}
error = pthread_barrier_init(&barrier, NULL, nthreads);
if (error)
errc(1, error, "pthread_barrier_init");
thread_spawn(&thread, nthreads, &thread_func);
thread_join(thread, nthreads);
error = pthread_barrier_destroy(&barrier);
if (error)
errc(1, error, "pthread_barrier_destroy");
timespecsub(&stop, &start, &elapsed);
printf("%lld.%09ld\n", (long long)elapsed.tv_sec, elapsed.tv_nsec);
return 0;
}
void *
thread_func(void *arg)
{
int error, status;
pid_t pid;
error = pthread_barrier_wait(&barrier);
if (error) {
if (error != PTHREAD_BARRIER_SERIAL_THREAD)
errc(1, error, "pthread_barrier_wait");
clock_gettime(CLOCK_MONOTONIC, &start);
}
while (!done) {
switch (pid = fork()) {
case -1:
err(1, "fork");
case 0:
execve(exec_path, exec_argv, default_envp);
error = (errno == ENOENT) ? 127 : 126;
err(error, "execve %s", exec_path);
default:
while (waitpid(pid, &status, 0) == -1) {
if (errno != EINTR)
err(1, "waitpid %ld", (long)pid);
}
if (atomic_inc_long_nv(&count) == max) {
clock_gettime(CLOCK_MONOTONIC, &stop);
done = 1;
}
}
}
return NULL;
}
void
thread_join(pthread_t *thread, unsigned int nthreads)
{
unsigned int i;
int error;
for (i = 0; i < nthreads; i++) {
error = pthread_join(thread[i], NULL);
if (error)
errc(1, error, "pthread_join");
}
}
void
thread_spawn(pthread_t **threadp, unsigned int nthreads, void *(*func)(void *))
{
pthread_t *thread;
unsigned int i;
int error;
*threadp = calloc(nthreads, sizeof(**threadp));
if (*threadp == NULL)
err(1, NULL);
thread = *threadp;
for (i = 0; i < nthreads; i++) {
error = pthread_create(&thread[i], NULL, func, NULL);
if (error)
errc(1, error, "pthread_create");
}
}
void
usage(void)
{
fprintf(stderr, "usage: %s [-n nthreads] count [command [arg ...]]\n",
getprogname());
exit(1);
}
--
And here's the updated patch.
Index: uvm_mmap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.165
diff -u -p -r1.165 uvm_mmap.c
--- uvm_mmap.c 5 Dec 2021 22:00:42 -0000 1.165
+++ uvm_mmap.c 6 Dec 2021 20:24:42 -0000
@@ -1112,10 +1112,13 @@ sys_kbind(struct proc *p, void *v, regis
if (pr->ps_kbind_addr == 0) {
pr->ps_kbind_addr = pc;
pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
- } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC)
- sigexit(p, SIGILL);
- else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie))
+ } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC ||
+ pr->ps_kbind_cookie != SCARG(uap, proc_cookie)) {
+ KERNEL_LOCK();
sigexit(p, SIGILL);
+ /* NOTREACHED */
+ }
+
if (psize < sizeof(struct __kbind) || psize > sizeof(param))
return EINVAL;
if ((error = copyin(paramp, ¶m, psize)))
@@ -1169,8 +1172,11 @@ sys_kbind(struct proc *p, void *v, regis
vm_map_unlock(kernel_map);
kva = 0;
}
- if ((error = uvm_map_extract(&p->p_vmspace->vm_map,
- baseva, PAGE_SIZE, &kva, UVM_EXTRACT_FIXPROT)))
+ KERNEL_LOCK();
+ error = uvm_map_extract(&p->p_vmspace->vm_map,
+ baseva, PAGE_SIZE, &kva, UVM_EXTRACT_FIXPROT);
+ KERNEL_UNLOCK();
+ if (error)
break;
last_baseva = baseva;
}