Module Name: src Committed By: riastradh Date: Sat Sep 11 10:09:31 UTC 2021
Modified Files: src/sys/kern: kern_ksyms.c Log Message: ksyms(4): Take a complete snapshot on each open. - Snapshots are stored in pageable anonymous uvm objects. - Snapshots are reference-counted so they can be reused across opens. - Opening /dev/ksyms blocks module unload until snapshot is taken. - Merely holding /dev/ksyms open does not block module unload. - /dev/ksyms is now mmappable. This slightly changes the behaviour of fstat(2) on /dev/ksyms -- it is a little more useful now! In particular, st_size is the size of the symbol table. Some other fields which were not very useful to begin with -- st_dev, st_ino, st_mode, st_nlink, st_*time, st_blksize, st_blocks -- are now different, and independent of the file system on which the device node resides. Discussed in https://mail-index.netbsd.org/source-changes-d/2021/08/17/msg013425.html This is option (3), adapted to make the ksyms snapshots pageable, after options (1) and (2) posed practical problems. To generate a diff of this commit: cvs rdiff -u -r1.102 -r1.103 src/sys/kern/kern_ksyms.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_ksyms.c diff -u src/sys/kern/kern_ksyms.c:1.102 src/sys/kern/kern_ksyms.c:1.103 --- src/sys/kern/kern_ksyms.c:1.102 Tue Sep 7 16:56:25 2021 +++ src/sys/kern/kern_ksyms.c Sat Sep 11 10:09:31 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_ksyms.c,v 1.102 2021/09/07 16:56:25 riastradh Exp $ */ +/* $NetBSD: kern_ksyms.c,v 1.103 2021/09/11 10:09:31 riastradh Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -73,7 +73,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.102 2021/09/07 16:56:25 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.103 2021/09/11 10:09:31 riastradh Exp $"); #if defined(_KERNEL) && defined(_KERNEL_OPT) #include "opt_copy_symtab.h" @@ -86,6 +86,9 @@ __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c #include <sys/param.h> #include <sys/queue.h> #include <sys/exec.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/kauth.h> #include <sys/systm.h> #include <sys/conf.h> #include <sys/kmem.h> @@ -94,6 +97,9 @@ __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c #include <sys/ksyms.h> #include <sys/kernel.h> #include <sys/intr.h> +#include <sys/stat.h> + +#include <uvm/uvm_extern.h> #ifdef DDB #include <ddb/db_output.h> @@ -104,6 +110,15 @@ __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c #include "ioconf.h" #endif +struct ksyms_snapshot { + uint64_t ks_refcnt; + uint64_t ks_gen; + struct uvm_object *ks_uobj; + size_t ks_size; + dev_t ks_dev; + int ks_maxlen; +}; + #define KSYMS_MAX_ID 98304 #ifdef KDTRACE_HOOKS static uint32_t ksyms_nmap[KSYMS_MAX_ID]; /* sorted symbol table map */ @@ -112,15 +127,20 @@ static uint32_t *ksyms_nmap = NULL; #endif static int ksyms_maxlen; -static uint64_t ksyms_opencnt; -static struct ksyms_symtab *ksyms_last_snapshot; static bool ksyms_initted; static bool ksyms_loaded; static kmutex_t ksyms_lock __cacheline_aligned; static struct ksyms_symtab kernel_symtab; +static kcondvar_t ksyms_cv; +static struct lwp *ksyms_snapshotting; +static struct ksyms_snapshot *ksyms_snapshot; +static uint64_t ksyms_snapshot_gen; static void ksyms_hdr_init(const void *); static void ksyms_sizes_calc(void); +static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t, + uint64_t); +static void ksyms_snapshot_release(struct ksyms_snapshot *); #ifdef KSYMS_DEBUG #define FOLLOW_CALLS 1 @@ -245,6 +265,7 @@ ksyms_init(void) if (!ksyms_initted) { mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&ksyms_cv, "ksyms"); ksyms_initted = true; } } @@ -328,7 +349,6 @@ addsymtab(const char *name, void *symsta tab->sd_minsym = UINTPTR_MAX; tab->sd_maxsym = 0; tab->sd_usroffset = 0; - tab->sd_gone = false; tab->sd_ctfstart = ctfstart; tab->sd_ctfsize = ctfsize; tab->sd_nmap = nmap; @@ -446,9 +466,9 @@ addsymtab(const char *name, void *symsta KASSERT(cold || mutex_owned(&ksyms_lock)); /* - * Ensure ddb never witnesses an inconsistent state of the - * queue, unless memory is so corrupt that we crash in - * TAILQ_INSERT_TAIL. + * Publish the symtab. Do this at splhigh to ensure ddb never + * witnesses an inconsistent state of the queue, unless memory + * is so corrupt that we crash in TAILQ_INSERT_TAIL. */ s = splhigh(); TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue); @@ -557,6 +577,9 @@ ksyms_addsyms_elf(int symsize, void *sta kernel_symtab.sd_symstart, kernel_symtab.sd_strstart, (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym)); #endif + + /* Should be no snapshot to invalidate yet. */ + KASSERT(ksyms_snapshot == NULL); } /* @@ -577,6 +600,9 @@ ksyms_addsyms_explicit(void *ehdr, void ksyms_hdr_init(ehdr); addsymtab("netbsd", symstart, symsize, strstart, strsize, &kernel_symtab, symstart, NULL, 0, ksyms_nmap); + + /* Should be no snapshot to invalidate yet. */ + KASSERT(ksyms_snapshot == NULL); } /* @@ -601,8 +627,6 @@ ksyms_getval_unlocked(const char *mod, c #endif TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; if (mod != NULL && strcmp(st->sd_name, mod)) continue; if ((es = findsym(sym, st, type)) != NULL) { @@ -636,8 +660,6 @@ ksyms_get_mod(const char *mod) mutex_enter(&ksyms_lock); TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; if (mod != NULL && strcmp(st->sd_name, mod)) continue; break; @@ -671,8 +693,6 @@ ksyms_mod_foreach(const char *mod, ksyms /* find the module */ TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; if (mod != NULL && strcmp(st->sd_name, mod)) continue; @@ -716,8 +736,6 @@ ksyms_getname(const char **mod, const ch return ENOENT; TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (st->sd_gone) - continue; if (v < st->sd_minsym || v > st->sd_maxsym) continue; sz = st->sd_symsize/sizeof(Elf_Sym); @@ -762,6 +780,7 @@ ksyms_modload(const char *name, void *sy char *strstart, vsize_t strsize) { struct ksyms_symtab *st; + struct ksyms_snapshot *ks; void *nmap; st = kmem_zalloc(sizeof(*st), KM_SLEEP); @@ -770,7 +789,12 @@ ksyms_modload(const char *name, void *sy mutex_enter(&ksyms_lock); addsymtab(name, symstart, symsize, strstart, strsize, st, symstart, NULL, 0, nmap); + ks = ksyms_snapshot; + ksyms_snapshot = NULL; mutex_exit(&ksyms_lock); + + if (ks) + ksyms_snapshot_release(ks); } /* @@ -780,37 +804,48 @@ void ksyms_modunload(const char *name) { struct ksyms_symtab *st; - bool do_free = false; + struct ksyms_snapshot *ks; int s; mutex_enter(&ksyms_lock); TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (st->sd_gone) - continue; if (strcmp(name, st->sd_name) != 0) continue; - st->sd_gone = true; - ksyms_sizes_calc(); - if (ksyms_opencnt == 0) { - /* - * Ensure ddb never witnesses an inconsistent - * state of the queue, unless memory is so - * corrupt that we crash in TAILQ_REMOVE. - */ - s = splhigh(); - TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); - splx(s); - do_free = true; - } break; } - mutex_exit(&ksyms_lock); KASSERT(st != NULL); - if (do_free) { - kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t)); - kmem_free(st, sizeof(*st)); - } + /* Wait for any snapshot in progress to complete. */ + while (ksyms_snapshotting) + cv_wait(&ksyms_cv, &ksyms_lock); + + /* + * Remove the symtab. Do this at splhigh to ensure ddb never + * witnesses an inconsistent state of the queue, unless memory + * is so corrupt that we crash in TAILQ_REMOVE. + */ + s = splhigh(); + TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); + splx(s); + + /* Recompute the ksyms sizes now that we've removed st. */ + ksyms_sizes_calc(); + + /* Invalidate the global ksyms snapshot. */ + ks = ksyms_snapshot; + ksyms_snapshot = NULL; + mutex_exit(&ksyms_lock); + + /* + * No more references are possible. Free the name map and the + * symtab itself, which we had allocated in ksyms_modload. + */ + kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t)); + kmem_free(st, sizeof(*st)); + + /* Release the formerly global ksyms snapshot, if any. */ + if (ks) + ksyms_snapshot_release(ks); } #ifdef DDB @@ -830,8 +865,6 @@ ksyms_sift(char *mod, char *sym, int mod return ENOENT; TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (st->sd_gone) - continue; if (mod && strcmp(mod, st->sd_name)) continue; sb = st->sd_strstart - st->sd_usroffset; @@ -893,8 +926,6 @@ ksyms_sizes_calc(void) ksyms_symsz = ksyms_strsz = 0; TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; delta = ksyms_strsz - st->sd_usroffset; if (delta != 0) { for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++) @@ -997,164 +1028,401 @@ ksyms_hdr_init(const void *hdraddr) SHTCOPY(".SUNW_ctf"); } -static int -ksymsopen(dev_t dev, int oflags, int devtype, struct lwp *l) +static struct ksyms_snapshot * +ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen) { - if (minor(dev) != 0 || !ksyms_loaded) - return ENXIO; + struct ksyms_snapshot *ks; - /* - * Create a "snapshot" of the kernel symbol table. Bumping - * ksyms_opencnt will prevent symbol tables from being freed. - */ - mutex_enter(&ksyms_lock); - if (ksyms_opencnt++) - goto out; - ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz; - ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym); - ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz + - ksyms_hdr.kh_shdr[SYMTAB].sh_offset; - ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz; - ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz + - ksyms_hdr.kh_shdr[STRTAB].sh_offset; - ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz; - ksyms_last_snapshot = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue); -out: mutex_exit(&ksyms_lock); + ks = kmem_zalloc(sizeof(*ks), KM_SLEEP); + ks->ks_refcnt = 1; + ks->ks_gen = gen; + ks->ks_uobj = uao_create(size, 0); + ks->ks_size = size; + ks->ks_dev = dev; + ks->ks_maxlen = maxlen; - return 0; + return ks; } -static int -ksymsclose(dev_t dev, int oflags, int devtype, struct lwp *l) +static void +ksyms_snapshot_release(struct ksyms_snapshot *ks) { - struct ksyms_symtab *st, *next; - TAILQ_HEAD(, ksyms_symtab) to_free = TAILQ_HEAD_INITIALIZER(to_free); - int s; + uint64_t refcnt; - /* Discard references to symbol tables. */ mutex_enter(&ksyms_lock); - if (--ksyms_opencnt) - goto out; - ksyms_last_snapshot = NULL; - TAILQ_FOREACH_SAFE(st, &ksyms_symtabs, sd_queue, next) { - if (st->sd_gone) { - /* - * Ensure ddb never witnesses an inconsistent - * state of the queue, unless memory is so - * corrupt that we crash in TAILQ_REMOVE. - */ - s = splhigh(); - TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue); - splx(s); - TAILQ_INSERT_TAIL(&to_free, st, sd_queue); - } - } - if (!TAILQ_EMPTY(&to_free)) - ksyms_sizes_calc(); -out: mutex_exit(&ksyms_lock); + refcnt = --ks->ks_refcnt; + mutex_exit(&ksyms_lock); - TAILQ_FOREACH_SAFE(st, &to_free, sd_queue, next) { - kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t)); - kmem_free(st, sizeof(*st)); - } + if (refcnt) + return; - return 0; + uao_detach(ks->ks_uobj); + kmem_free(ks, sizeof(*ks)); } static int -ksymsread(dev_t dev, struct uio *uio, int ioflag) +ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf, + size_t n) { + struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n }; + + uio->uio_iov = &iov; + uio->uio_iovcnt = 1; + uio->uio_resid = n; + + return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE); +} + +static int +ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last) +{ + struct uvm_object *uobj = ks->ks_uobj; + struct uio uio; struct ksyms_symtab *st; - size_t filepos, inpos, off; int error; + /* Caller must have initiated snapshotting. */ + KASSERT(ksyms_snapshotting == curlwp); + + /* Start a uio transfer to reuse incrementally. */ + uio.uio_offset = 0; + uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */ + UIO_SETUP_SYSSPACE(&uio); + /* - * First: Copy out the ELF header. XXX Lose if ksymsopen() - * occurs during read of the header. + * First: Copy out the ELF header. */ - off = uio->uio_offset; - if (off < sizeof(struct ksyms_hdr)) { - error = uiomove((char *)&ksyms_hdr + off, - sizeof(struct ksyms_hdr) - off, uio); - if (error != 0) - return error; - } + error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr)); + if (error) + return error; /* - * Copy out the symbol table. + * Copy out the symbol table. The list of symtabs is + * guaranteed to be nonempty because we always have an entry + * for the main kernel. We stop at last, not at the end of the + * tailq or NULL, because entries beyond last are not included + * in this snapshot (and may not be fully initialized memory as + * we witness it). */ - filepos = sizeof(struct ksyms_hdr); - TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (__predict_false(st->sd_gone)) - continue; - if (uio->uio_resid == 0) - return 0; - if (uio->uio_offset <= st->sd_symsize + filepos) { - inpos = uio->uio_offset - filepos; - error = uiomove((char *)st->sd_symstart + inpos, - st->sd_symsize - inpos, uio); - if (error != 0) - return error; - } - filepos += st->sd_symsize; - if (st == ksyms_last_snapshot) + KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr)); + for (st = TAILQ_FIRST(&ksyms_symtabs); + ; + st = TAILQ_NEXT(st, sd_queue)) { + error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart, + st->sd_symsize); + if (error) + return error; + if (st == last) break; } /* * Copy out the string table */ - KASSERT(filepos == sizeof(struct ksyms_hdr) + + KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) + ksyms_hdr.kh_shdr[SYMTAB].sh_size); for (st = TAILQ_FIRST(&ksyms_symtabs); ; st = TAILQ_NEXT(st, sd_queue)) { - if (uio->uio_resid == 0) - return 0; - if (uio->uio_offset <= st->sd_strsize + filepos) { - inpos = uio->uio_offset - filepos; - error = uiomove((char *)st->sd_strstart + inpos, - st->sd_strsize - inpos, uio); - if (error != 0) - return error; - } - filepos += st->sd_strsize; - if (st == ksyms_last_snapshot) + error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart, + st->sd_strsize); + if (error) + return error; + if (st == last) break; } /* * Copy out the CTF table. */ + KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) + + ksyms_hdr.kh_shdr[SYMTAB].sh_size + + ksyms_hdr.kh_shdr[STRTAB].sh_size); st = TAILQ_FIRST(&ksyms_symtabs); if (st->sd_ctfstart != NULL) { - if (uio->uio_resid == 0) - return 0; - if (uio->uio_offset <= st->sd_ctfsize + filepos) { - inpos = uio->uio_offset - filepos; - error = uiomove((char *)st->sd_ctfstart + inpos, - st->sd_ctfsize - inpos, uio); - if (error != 0) - return error; - } - filepos += st->sd_ctfsize; + error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart, + st->sd_ctfsize); + if (error) + return error; } + KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) + + ksyms_hdr.kh_shdr[SYMTAB].sh_size + + ksyms_hdr.kh_shdr[STRTAB].sh_size + + ksyms_hdr.kh_shdr[SHCTF].sh_size); + KASSERT(uio.uio_offset == ks->ks_size); + return 0; } +static const struct fileops ksyms_fileops; + static int -ksymswrite(dev_t dev, struct uio *uio, int ioflag) +ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l) { - return EROFS; + struct file *fp = NULL; + int fd = -1; + struct ksyms_snapshot *ks = NULL; + size_t size; + struct ksyms_symtab *last; + int maxlen; + uint64_t gen; + int error; + + if (minor(dev) != 0 || !ksyms_loaded) + return ENXIO; + + /* Allocate a private file. */ + error = fd_allocfile(&fp, &fd); + if (error) + return error; + + mutex_enter(&ksyms_lock); + + /* + * Wait until we have a snapshot, or until there is no snapshot + * being taken right now so we can take one. + */ + while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) { + error = cv_wait_sig(&ksyms_cv, &ksyms_lock); + if (error) + goto out; + } + + /* + * If there's a usable snapshot, increment its reference count + * (can't overflow, 64-bit) and just reuse it. + */ + if (ks) { + ks->ks_refcnt++; + goto out; + } + + /* Find the current length of the symtab object. */ + size = sizeof(struct ksyms_hdr); + size += ksyms_strsz; + size += ksyms_symsz; + size += ksyms_ctfsz; + + /* Start a new snapshot. */ + ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz; + ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym); + ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz + + ksyms_hdr.kh_shdr[SYMTAB].sh_offset; + ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz; + ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz + + ksyms_hdr.kh_shdr[STRTAB].sh_offset; + ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz; + last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue); + maxlen = ksyms_maxlen; + gen = ksyms_snapshot_gen++; + + /* + * Prevent ksyms entries from being removed while we take the + * snapshot. + */ + KASSERT(ksyms_snapshotting == NULL); + ksyms_snapshotting = curlwp; + mutex_exit(&ksyms_lock); + + /* Create a snapshot and write the symtab to it. */ + ks = ksyms_snapshot_alloc(maxlen, size, dev, gen); + error = ksyms_take_snapshot(ks, last); + + /* + * Snapshot creation is done. Wake up anyone waiting to remove + * entries (module unload). + */ + mutex_enter(&ksyms_lock); + KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot", + ksyms_snapshotting); + ksyms_snapshotting = NULL; + cv_broadcast(&ksyms_cv); + + /* If we failed, give up. */ + if (error) + goto out; + + /* Cache the snapshot for the next reader. */ + KASSERT(ksyms_snapshot == NULL); + ksyms_snapshot = ks; + ks->ks_refcnt++; + KASSERT(ks->ks_refcnt == 2); + +out: mutex_exit(&ksyms_lock); + if (error) { + if (fp) + fd_abort(curproc, fp, fd); + if (ks) + ksyms_snapshot_release(ks); + } else { + KASSERT(fp); + KASSERT(ks); + error = fd_clone(fp, fd, flags, &ksyms_fileops, ks); + KASSERTMSG(error == EMOVEFD, "error=%d", error); + } + return error; +} + +static int +ksymsclose(struct file *fp) +{ + struct ksyms_snapshot *ks = fp->f_data; + + ksyms_snapshot_release(ks); + + return 0; +} + +static int +ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, + int flags) +{ + const struct ksyms_snapshot *ks = fp->f_data; + size_t count; + int error; + + /* + * Since we don't have a per-object lock, we might as well use + * the struct file lock to serialize access to fp->f_offset -- + * but if the caller isn't relying on or updating fp->f_offset, + * there's no need to do even that. We could use ksyms_lock, + * but why bother with a global lock if not needed? Either + * way, the lock we use here must agree with what ksymsseek + * takes (nothing else in ksyms uses fp->f_offset). + */ + if (offp == &fp->f_offset) + mutex_enter(&fp->f_lock); + + /* Refuse negative offsets. */ + if (*offp < 0) { + error = EINVAL; + goto out; + } + + /* Return nothing at or past end of file. */ + if (*offp >= ks->ks_size) { + error = 0; + goto out; + } + + /* + * 1. Set up the uio to transfer from offset *offp. + * 2. Transfer as many bytes as we can (at most uio->uio_resid + * or what's left in the ksyms). + * 3. If requested, update *offp to reflect the number of bytes + * transferred. + */ + uio->uio_offset = *offp; + count = uio->uio_resid; + error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp), + UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK); + if (flags & FOF_UPDATE_OFFSET) + *offp += count - uio->uio_resid; + +out: if (offp == &fp->f_offset) + mutex_exit(&fp->f_lock); + return error; +} + +static int +ksymsstat(struct file *fp, struct stat *st) +{ + const struct ksyms_snapshot *ks = fp->f_data; + + memset(st, 0, sizeof(*st)); + + st->st_dev = NODEV; + st->st_ino = 0; + st->st_mode = S_IFCHR; + st->st_nlink = 1; + st->st_uid = kauth_cred_geteuid(fp->f_cred); + st->st_gid = kauth_cred_getegid(fp->f_cred); + st->st_rdev = ks->ks_dev; + st->st_size = ks->ks_size; + /* zero time */ + st->st_blksize = MAXPHYS; /* XXX arbitrary */ + st->st_blocks = 0; + st->st_gen = ks->ks_gen; + + return 0; +} + +static int +ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp, + int *advicep, struct uvm_object **uobjp, int *maxprotp) +{ + const struct ksyms_snapshot *ks = fp->f_data; + + /* uvm_mmap guarantees page-aligned offset and size. */ + KASSERT(*offp == round_page(*offp)); + KASSERT(nbytes == round_page(nbytes)); + + /* Refuse negative offsets. */ + if (*offp < 0) + return EINVAL; + + /* Refuse mappings that pass the end of file. */ + if (nbytes > round_page(ks->ks_size) || + *offp > round_page(ks->ks_size) - nbytes) + return EINVAL; /* XXX ??? */ + + /* Success! */ + *advicep = UVM_ADV_SEQUENTIAL; + *uobjp = ks->ks_uobj; + *maxprotp = prot & VM_PROT_READ; + return 0; +} + +static int +ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags) +{ + struct ksyms_snapshot *ks = fp->f_data; + off_t base, newoff; + int error; + + mutex_enter(&fp->f_lock); + + switch (whence) { + case SEEK_CUR: + base = fp->f_offset; + break; + case SEEK_END: + base = ks->ks_size; + break; + case SEEK_SET: + base = 0; + break; + default: + error = EINVAL; + goto out; + } + + /* Compute the new offset and validate it. */ + newoff = base + delta; /* XXX arithmetic overflow */ + if (newoff < 0) { + error = EINVAL; + goto out; + } + + /* Success! */ + if (newoffp) + *newoffp = newoff; + if (flags & FOF_UPDATE_OFFSET) + fp->f_offset = newoff; + error = 0; + +out: mutex_exit(&fp->f_lock); + return error; } __CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name)); __CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name)); static int -ksymsioctl(dev_t dev, u_long cmd, void *data, int fflag, struct lwp *l) +ksymsioctl(struct file *fp, u_long cmd, void *data) { + struct ksyms_snapshot *ks = fp->f_data; struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data; struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data; struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data; @@ -1165,8 +1433,8 @@ ksymsioctl(dev_t dev, u_long cmd, void * char *str = NULL; int len; - /* Read ksyms_maxlen only once while not holding the lock. */ - len = ksyms_maxlen; + /* Read cached ksyms_maxlen. */ + len = ks->ks_maxlen; if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL || cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) { @@ -1196,8 +1464,6 @@ ksymsioctl(dev_t dev, u_long cmd, void * */ mutex_enter(&ksyms_lock); TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (st->sd_gone) - continue; if ((sym = findsym(str, st, KSYMS_ANY)) == NULL) continue; #ifdef notdef @@ -1238,8 +1504,6 @@ ksymsioctl(dev_t dev, u_long cmd, void * */ mutex_enter(&ksyms_lock); TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) { - if (st->sd_gone) - continue; if ((sym = findsym(str, st, KSYMS_ANY)) == NULL) continue; #ifdef notdef @@ -1264,10 +1528,7 @@ ksymsioctl(dev_t dev, u_long cmd, void * /* * Get total size of symbol table. */ - mutex_enter(&ksyms_lock); - *(int *)data = ksyms_strsz + ksyms_symsz + - sizeof(struct ksyms_hdr); - mutex_exit(&ksyms_lock); + *(int *)data = ks->ks_size; break; default: @@ -1280,15 +1541,30 @@ ksymsioctl(dev_t dev, u_long cmd, void * const struct cdevsw ksyms_cdevsw = { .d_open = ksymsopen, - .d_close = ksymsclose, - .d_read = ksymsread, - .d_write = ksymswrite, - .d_ioctl = ksymsioctl, - .d_stop = nullstop, + .d_close = noclose, + .d_read = noread, + .d_write = nowrite, + .d_ioctl = noioctl, + .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, .d_mmap = nommap, - .d_kqfilter = nullkqfilter, + .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = D_OTHER | D_MPSAFE }; + +static const struct fileops ksyms_fileops = { + .fo_name = "ksyms", + .fo_read = ksymsread, + .fo_write = fbadop_write, + .fo_ioctl = ksymsioctl, + .fo_fcntl = fnullop_fcntl, + .fo_poll = fnullop_poll, + .fo_stat = ksymsstat, + .fo_close = ksymsclose, + .fo_kqfilter = fnullop_kqfilter, + .fo_restart = fnullop_restart, + .fo_mmap = ksymsmmap, + .fo_seek = ksymsseek, +};