Module Name: src Committed By: martin Date: Wed Jun 21 22:34:51 UTC 2023
Modified Files: src/sys/dev/tprof [netbsd-10]: tprof.c tprof_armv7.c tprof_armv8.c tprof_types.h tprof_x86_amd.c tprof_x86_intel.c src/usr.sbin/tprof [netbsd-10]: tprof.8 tprof.c tprof.h tprof_top.c src/usr.sbin/tprof/arch [netbsd-10]: tprof_x86.c Log Message: Pull up following revision(s) (requested by msaitoh in ticket #210): usr.sbin/tprof/tprof.8: revision 1.30 sys/dev/tprof/tprof_x86_amd.c: revision 1.8 sys/dev/tprof/tprof_armv8.c: revision 1.20 sys/dev/tprof/tprof_types.h: revision 1.7 sys/dev/tprof/tprof_x86_intel.c: revision 1.6 sys/dev/tprof/tprof_x86_intel.c: revision 1.7 sys/dev/tprof/tprof_x86_intel.c: revision 1.8 sys/dev/tprof/tprof.c: revision 1.23 usr.sbin/tprof/tprof.8: revision 1.25 usr.sbin/tprof/tprof.8: revision 1.26 usr.sbin/tprof/arch/tprof_x86.c: revision 1.16 usr.sbin/tprof/tprof.8: revision 1.27 usr.sbin/tprof/arch/tprof_x86.c: revision 1.17 usr.sbin/tprof/tprof.8: revision 1.28 usr.sbin/tprof/tprof.h: revision 1.5 usr.sbin/tprof/tprof.8: revision 1.29 sys/dev/tprof/tprof_armv7.c: revision 1.13 usr.sbin/tprof/tprof_top.c: revision 1.9 usr.sbin/tprof/tprof.c: revision 1.21 Add Cometlake support. Obtain the number of general counters from CPUID 0xa. Test cpuid_level in tprof_intel_ncounters(). This function is called before tprof_intel_ident(). KNF. No functional change. Add two note to the tprof(8)'s manual page. - "list" command prints the maximum number of counters that can be used simultaneously. - multiple -e arguments can be specified. Use the default counter if -e argument is not specified. monitor command: The default counter is selected if -e argument is not specified. list command: Print the name of the default counter for monitor and top command. tprof.8: new sentence, new line tprof(8): fix markup nits tprof.8: fix typo, s/speficied/specified/ To generate a diff of this commit: cvs rdiff -u -r1.21.2.1 -r1.21.2.2 src/sys/dev/tprof/tprof.c cvs rdiff -u -r1.11.2.1 -r1.11.2.2 src/sys/dev/tprof/tprof_armv7.c cvs rdiff -u -r1.18.2.1 -r1.18.2.2 src/sys/dev/tprof/tprof_armv8.c cvs rdiff -u -r1.6 -r1.6.2.1 src/sys/dev/tprof/tprof_types.h cvs rdiff -u -r1.7 -r1.7.2.1 src/sys/dev/tprof/tprof_x86_amd.c cvs rdiff -u -r1.5 -r1.5.2.1 src/sys/dev/tprof/tprof_x86_intel.c cvs rdiff -u -r1.24 -r1.24.2.1 src/usr.sbin/tprof/tprof.8 cvs rdiff -u -r1.18.2.1 -r1.18.2.2 src/usr.sbin/tprof/tprof.c cvs rdiff -u -r1.4 -r1.4.2.1 src/usr.sbin/tprof/tprof.h cvs rdiff -u -r1.7.2.1 -r1.7.2.2 src/usr.sbin/tprof/tprof_top.c cvs rdiff -u -r1.15 -r1.15.2.1 src/usr.sbin/tprof/arch/tprof_x86.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/tprof/tprof.c diff -u src/sys/dev/tprof/tprof.c:1.21.2.1 src/sys/dev/tprof/tprof.c:1.21.2.2 --- src/sys/dev/tprof/tprof.c:1.21.2.1 Fri Dec 23 08:09:48 2022 +++ src/sys/dev/tprof/tprof.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.c,v 1.21.2.1 2022/12/23 08:09:48 martin Exp $ */ +/* $NetBSD: tprof.c,v 1.21.2.2 2023/06/21 22:34:51 martin Exp $ */ /*- * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.21.2.1 2022/12/23 08:09:48 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.21.2.2 2023/06/21 22:34:51 martin Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -213,7 +213,7 @@ tprof_worker(struct work *wk, void *dumm KASSERT(dummy == NULL); /* - * get a per cpu buffer. + * Get a per cpu buffer. */ buf = tprof_buf_refresh(); @@ -245,12 +245,11 @@ tprof_worker(struct work *wk, void *dumm tprof_stat.ts_dropbuf++; } mutex_exit(&tprof_lock); - if (buf) { + if (buf) tprof_buf_free(buf); - } - if (!shouldstop) { + + if (!shouldstop) callout_schedule(&c->c_callout, hz / 8); - } } static void @@ -276,9 +275,9 @@ tprof_stop1(void) tprof_buf_t *old; old = tprof_buf_switch(c, NULL); - if (old != NULL) { + if (old != NULL) tprof_buf_free(old); - } + callout_destroy(&c->c_callout); } workqueue_destroy(tprof_wq); @@ -293,9 +292,8 @@ tprof_getinfo(struct tprof_info *info) memset(info, 0, sizeof(*info)); info->ti_version = TPROF_VERSION; - if ((tb = tprof_backend) != NULL) { + if ((tb = tprof_backend) != NULL) info->ti_ident = tb->tb_ops->tbo_ident(); - } } static int @@ -351,8 +349,8 @@ tprof_start(tprof_countermask_t runmask) runmask &= tb->tb_softc.sc_ctr_configured_mask; if (runmask == 0) { /* - * targets are already running. - * unconfigured counters are ignored. + * Targets are already running. + * Unconfigured counters are ignored. */ error = 0; goto done; @@ -427,7 +425,7 @@ tprof_stop(tprof_countermask_t stopmask) KASSERT(mutex_owned(&tprof_startstop_lock)); stopmask &= tb->tb_softc.sc_ctr_running_mask; if (stopmask == 0) { - /* targets are not running */ + /* Targets are not running */ goto done; } @@ -437,13 +435,13 @@ tprof_stop(tprof_countermask_t stopmask) tb->tb_softc.sc_ctr_running_mask &= ~stopmask; mutex_exit(&tprof_lock); - /* all counters have stopped? */ + /* All counters have stopped? */ if (tb->tb_softc.sc_ctr_running_mask == 0) { mutex_enter(&tprof_lock); cv_broadcast(&tprof_reader_cv); - while (tprof_nworker > 0) { + while (tprof_nworker > 0) cv_wait(&tprof_cv, &tprof_lock); - } + mutex_exit(&tprof_lock); tprof_stop1(); @@ -516,7 +514,7 @@ tprof_configure_event(const tprof_param_ tb->tb_ops->tbo_counter_bitwidth(param->p_counter); sc_param = &sc->sc_count[c].ctr_param; - memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */ + memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */ if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { uint64_t freq, inum, dnum; @@ -618,9 +616,8 @@ tprof_getcounts_cpu(void *arg1, void *ar counters[c] = counters_offset[c] + ((ctr - sc->sc_count[c].ctr_counter_reset_val) & __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)); - } else { + } else counters[c] = 0; - } } percpu_putref(sc->sc_ctr_offset_percpu); } @@ -741,9 +738,8 @@ tprof_backend_register(const char *name, { tprof_backend_t *tb; - if (vers != TPROF_BACKEND_VERSION) { + if (vers != TPROF_BACKEND_VERSION) return EINVAL; - } mutex_enter(&tprof_startstop_lock); tb = tprof_backend_lookup(name); @@ -768,7 +764,7 @@ tprof_backend_register(const char *name, #endif mutex_exit(&tprof_startstop_lock); - /* init backend softc */ + /* Init backend softc */ tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters(); tb->tb_softc.sc_ctr_offset_percpu_size = sizeof(uint64_t) * tb->tb_softc.sc_ncounters; @@ -800,9 +796,8 @@ tprof_backend_unregister(const char *nam return EBUSY; } #if 1 /* XXX for now */ - if (tprof_backend == tb) { + if (tprof_backend == tb) tprof_backend = NULL; - } #endif LIST_REMOVE(tb, tb_list); mutex_exit(&tprof_startstop_lock); @@ -811,7 +806,7 @@ tprof_backend_unregister(const char *nam percpu_free(tb->tb_softc.sc_ctr_offset_percpu, tb->tb_softc.sc_ctr_offset_percpu_size); - /* free backend */ + /* Free backend */ kmem_free(tb, sizeof(*tb)); return 0; @@ -823,9 +818,9 @@ static int tprof_open(dev_t dev, int flags, int type, struct lwp *l) { - if (minor(dev) != 0) { + if (minor(dev) != 0) return EXDEV; - } + mutex_enter(&tprof_lock); if (tprof_owner != NULL) { mutex_exit(&tprof_lock); @@ -953,7 +948,7 @@ tprof_read(dev_t dev, struct uio *uio, i mutex_enter(&tprof_reader_lock); while (uio->uio_resid > 0 && error == 0) { /* - * take the first buffer from the list. + * Take the first buffer from the list. */ mutex_enter(&tprof_lock); buf = STAILQ_FIRST(&tprof_list); @@ -975,7 +970,7 @@ tprof_read(dev_t dev, struct uio *uio, i mutex_exit(&tprof_lock); /* - * copy it out. + * Copy it out. */ bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - tprof_reader_offset, uio->uio_resid); @@ -986,7 +981,7 @@ tprof_read(dev_t dev, struct uio *uio, i tprof_reader_offset += done; /* - * if we didn't consume the whole buffer, + * If we didn't consume the whole buffer, * put it back to the list. */ if (tprof_reader_offset < @@ -1080,7 +1075,7 @@ void tprofattach(int nunits) { - /* nothing */ + /* Nothing */ } MODULE(MODULE_CLASS_DRIVER, tprof, NULL); Index: src/sys/dev/tprof/tprof_armv7.c diff -u src/sys/dev/tprof/tprof_armv7.c:1.11.2.1 src/sys/dev/tprof/tprof_armv7.c:1.11.2.2 --- src/sys/dev/tprof/tprof_armv7.c:1.11.2.1 Fri Dec 23 08:09:48 2022 +++ src/sys/dev/tprof/tprof_armv7.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_armv7.c,v 1.11.2.1 2022/12/23 08:09:48 martin Exp $ */ +/* $NetBSD: tprof_armv7.c,v 1.11.2.2 2023/06/21 22:34:51 martin Exp $ */ /*- * Copyright (c) 2018 Jared McNeill <jmcne...@invisible.ca> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.11.2.1 2022/12/23 08:09:48 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.11.2.2 2023/06/21 22:34:51 martin Exp $"); #include <sys/param.h> #include <sys/bus.h> @@ -117,7 +117,7 @@ armv7_pmu_get_pmevcntr(u_int counter) return armreg_pmxevcntr_read(); } -/* read and write at once */ +/* Read and write at once */ static inline uint64_t armv7_pmu_getset_pmevcntr(u_int counter, uint64_t val) { @@ -190,7 +190,7 @@ armv7_pmu_configure_event(u_int counter, /* Clear overflow flag */ armreg_pmovsr_write(__BIT(counter) & PMOVS_P); - /* reset the counter */ + /* Reset the counter */ armv7_pmu_set_pmevcntr(counter, param->p_value); } @@ -235,13 +235,13 @@ armv7_pmu_intr(void *priv) CLR(mask, __BIT(bit)); if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { - /* account for the counter, and reset */ + /* Account for the counter, and reset */ uint64_t ctr = armv7_pmu_getset_pmevcntr(bit, sc->sc_count[bit].ctr_counter_reset_val); counters_offset[bit] += sc->sc_count[bit].ctr_counter_val + ctr; - /* record a sample */ + /* Record a sample */ tfi.tfi_pc = tf->tf_pc; tfi.tfi_counter = bit; tfi.tfi_inkernel = @@ -249,7 +249,7 @@ armv7_pmu_intr(void *priv) tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; tprof_sample(NULL, &tfi); } else if (ISSET(sc->sc_ctr_ovf_mask, __BIT(bit))) { - /* counter has overflowed */ + /* Counter has overflowed */ counters_offset[bit] += __BIT(32); } } Index: src/sys/dev/tprof/tprof_armv8.c diff -u src/sys/dev/tprof/tprof_armv8.c:1.18.2.1 src/sys/dev/tprof/tprof_armv8.c:1.18.2.2 --- src/sys/dev/tprof/tprof_armv8.c:1.18.2.1 Fri Dec 23 08:09:48 2022 +++ src/sys/dev/tprof/tprof_armv8.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_armv8.c,v 1.18.2.1 2022/12/23 08:09:48 martin Exp $ */ +/* $NetBSD: tprof_armv8.c,v 1.18.2.2 2023/06/21 22:34:51 martin Exp $ */ /*- * Copyright (c) 2018 Jared McNeill <jmcne...@invisible.ca> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.18.2.1 2022/12/23 08:09:48 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.18.2.2 2023/06/21 22:34:51 martin Exp $"); #include <sys/param.h> #include <sys/bus.h> @@ -94,7 +94,7 @@ armv8_pmu_get_pmevcntr(u_int counter) return reg_pmxevcntr_el0_read(); } -/* read and write at once */ +/* Read and write at once */ static inline uint64_t armv8_pmu_getset_pmevcntr(u_int counter, uint64_t val) { @@ -162,7 +162,7 @@ armv8_pmu_configure_event(u_int counter, /* Clear overflow flag */ reg_pmovsclr_el0_write(__BIT(counter) & PMOVS_P); - /* reset the counter */ + /* Reset the counter */ armv8_pmu_set_pmevcntr(counter, param->p_value); } @@ -207,13 +207,13 @@ armv8_pmu_intr(void *priv) CLR(mask, __BIT(bit)); if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { - /* account for the counter, and reset */ + /* Account for the counter, and reset */ uint64_t ctr = armv8_pmu_getset_pmevcntr(bit, sc->sc_count[bit].ctr_counter_reset_val); counters_offset[bit] += sc->sc_count[bit].ctr_counter_val + ctr; - /* record a sample */ + /* Record a sample */ tfi.tfi_pc = tf->tf_pc; tfi.tfi_counter = bit; tfi.tfi_inkernel = @@ -221,7 +221,7 @@ armv8_pmu_intr(void *priv) tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; tprof_sample(NULL, &tfi); } else if (ISSET(sc->sc_ctr_ovf_mask, __BIT(bit))) { - /* counter has overflowed */ + /* Counter has overflowed */ counters_offset[bit] += __BIT(32); } } Index: src/sys/dev/tprof/tprof_types.h diff -u src/sys/dev/tprof/tprof_types.h:1.6 src/sys/dev/tprof/tprof_types.h:1.6.2.1 --- src/sys/dev/tprof/tprof_types.h:1.6 Thu Dec 1 00:32:52 2022 +++ src/sys/dev/tprof/tprof_types.h Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_types.h,v 1.6 2022/12/01 00:32:52 ryo Exp $ */ +/* $NetBSD: tprof_types.h,v 1.6.2.1 2023/06/21 22:34:51 martin Exp $ */ /*- * Copyright (c)2010,2011 YAMAMOTO Takashi, @@ -76,8 +76,8 @@ typedef struct tprof_param { * speed ratio. if the counter is N times slower than the cycle * counter, p_value2 is (0x1_0000_0000 / N). 0 is treated as 1.0. * TPROF_PARAM_VALUE2_TRIGGERCOUNT: - * When the event counter counts up p_value2, an interrupt for profile - * is generated. 0 is treated as 1. + * When the event counter counts up p_value2, an interrupt for + * profile is generated. 0 is treated as 1. */ } tprof_param_t; Index: src/sys/dev/tprof/tprof_x86_amd.c diff -u src/sys/dev/tprof/tprof_x86_amd.c:1.7 src/sys/dev/tprof/tprof_x86_amd.c:1.7.2.1 --- src/sys/dev/tprof/tprof_x86_amd.c:1.7 Thu Dec 8 05:29:27 2022 +++ src/sys/dev/tprof/tprof_x86_amd.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_x86_amd.c,v 1.7 2022/12/08 05:29:27 msaitoh Exp $ */ +/* $NetBSD: tprof_x86_amd.c,v 1.7.2.1 2023/06/21 22:34:51 martin Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -56,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_x86_amd.c,v 1.7 2022/12/08 05:29:27 msaitoh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_x86_amd.c,v 1.7.2.1 2023/06/21 22:34:51 martin Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -154,7 +154,7 @@ tprof_amd_configure_event(u_int counter, __SHIFTIN(param->p_unit, PESR_UNIT_MASK); wrmsr(PERFEVTSEL(counter), pesr); - /* reset the counter */ + /* Reset the counter */ tprof_amd_counter_write(counter, param->p_value); } @@ -202,13 +202,13 @@ tprof_amd_nmi(const struct trapframe *tf continue; /* not overflowed */ if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { - /* account for the counter, and reset */ + /* Account for the counter, and reset */ tprof_amd_counter_write(bit, sc->sc_count[bit].ctr_counter_reset_val); counters_offset[bit] += sc->sc_count[bit].ctr_counter_val + ctr; - /* record a sample */ + /* Record a sample */ #if defined(__x86_64__) tfi.tfi_pc = tf->tf_rip; #else @@ -218,7 +218,7 @@ tprof_amd_nmi(const struct trapframe *tf tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; tprof_sample(NULL, &tfi); } else { - /* not profiled, but require to consider overflow */ + /* Not profiled, but require to consider overflow */ counters_offset[bit] += __BIT(COUNTER_BITWIDTH); } } @@ -237,9 +237,8 @@ tprof_amd_ident(void) { struct cpu_info *ci = curcpu(); - if (cpu_vendor != CPUVENDOR_AMD) { + if (cpu_vendor != CPUVENDOR_AMD) return TPROF_IDENT_NONE; - } switch (CPUID_TO_FAMILY(ci->ci_signature)) { case 0x10: @@ -274,9 +273,8 @@ tprof_amd_establish(tprof_backend_softc_ { uint64_t xc; - if (tprof_amd_ident() == TPROF_IDENT_NONE) { + if (tprof_amd_ident() == TPROF_IDENT_NONE) return ENOTSUP; - } KASSERT(amd_nmi_handle == NULL); amd_nmi_handle = nmi_establish(tprof_amd_nmi, sc); Index: src/sys/dev/tprof/tprof_x86_intel.c diff -u src/sys/dev/tprof/tprof_x86_intel.c:1.5 src/sys/dev/tprof/tprof_x86_intel.c:1.5.2.1 --- src/sys/dev/tprof/tprof_x86_intel.c:1.5 Thu Dec 1 00:32:52 2022 +++ src/sys/dev/tprof/tprof_x86_intel.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_x86_intel.c,v 1.5 2022/12/01 00:32:52 ryo Exp $ */ +/* $NetBSD: tprof_x86_intel.c,v 1.5.2.1 2023/06/21 22:34:51 martin Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -56,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_x86_intel.c,v 1.5 2022/12/01 00:32:52 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_x86_intel.c,v 1.5.2.1 2023/06/21 22:34:51 martin Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -79,7 +79,6 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_in #include <machine/i82489reg.h> #include <machine/i82489var.h> -#define NCTRS 4 /* XXX */ static u_int counter_bitwidth; #define PERFEVTSEL(i) (MSR_EVNTSEL0 + (i)) @@ -102,24 +101,34 @@ static nmi_handler_t *intel_nmi_handle; static uint32_t tprof_intel_ncounters(void) { - return NCTRS; + uint32_t descs[4]; + + if (cpuid_level < 0x0a) + return 0; + + x86_cpuid(0x0a, descs); + + return __SHIFTOUT(descs[0], CPUID_PERF_NGPPC); } static u_int tprof_intel_counter_bitwidth(u_int counter) { + return counter_bitwidth; } static inline void tprof_intel_counter_write(u_int counter, uint64_t val) { + wrmsr(PERFCTR(counter), val); } static inline uint64_t tprof_intel_counter_read(u_int counter) { + return rdmsr(PERFCTR(counter)); } @@ -136,7 +145,7 @@ tprof_intel_configure_event(u_int counte PERFEVTSEL_INT; wrmsr(PERFEVTSEL(counter), evtval); - /* reset the counter */ + /* Reset the counter */ tprof_intel_counter_write(counter, param->p_value); } @@ -160,7 +169,8 @@ tprof_intel_stop(tprof_countermask_t sto while ((bit = ffs(stopmask)) != 0) { bit--; CLR(stopmask, __BIT(bit)); - wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) & ~PERFEVTSEL_EN); + wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) & + ~PERFEVTSEL_EN); } } @@ -185,13 +195,13 @@ tprof_intel_nmi(const struct trapframe * continue; /* not overflowed */ if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { - /* account for the counter, and reset */ + /* Account for the counter, and reset */ tprof_intel_counter_write(bit, sc->sc_count[bit].ctr_counter_reset_val); counters_offset[bit] += sc->sc_count[bit].ctr_counter_val + ctr; - /* record a sample */ + /* Record a sample */ #if defined(__x86_64__) tfi.tfi_pc = tf->tf_rip; #else @@ -201,12 +211,12 @@ tprof_intel_nmi(const struct trapframe * tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; tprof_sample(NULL, &tfi); } else { - /* not profiled, but require to consider overflow */ + /* Not profiled, but require to consider overflow */ counters_offset[bit] += __BIT(counter_bitwidth); } } - /* unmask PMI */ + /* Unmask PMI */ pcint = lapic_readreg(LAPIC_LVT_PCINT); KASSERT((pcint & LAPIC_LVT_MASKED) != 0); lapic_writereg(LAPIC_LVT_PCINT, pcint & ~LAPIC_LVT_MASKED); @@ -217,6 +227,7 @@ tprof_intel_nmi(const struct trapframe * static uint64_t tprof_intel_counter_estimate_freq(u_int counter) { + return curcpu()->ci_data.cpu_cc_freq; } @@ -225,20 +236,18 @@ tprof_intel_ident(void) { uint32_t descs[4]; - if (cpu_vendor != CPUVENDOR_INTEL) { + if (cpu_vendor != CPUVENDOR_INTEL) return TPROF_IDENT_NONE; - } - if (cpuid_level < 0x0A) { + if (cpuid_level < 0x0a) return TPROF_IDENT_NONE; - } - x86_cpuid(0x0A, descs); - if ((descs[0] & CPUID_PERF_VERSION) == 0) { + + x86_cpuid(0x0a, descs); + if ((descs[0] & CPUID_PERF_VERSION) == 0) return TPROF_IDENT_NONE; - } - if ((descs[0] & CPUID_PERF_NGPPC) == 0) { + + if ((descs[0] & CPUID_PERF_NGPPC) == 0) return TPROF_IDENT_NONE; - } counter_bitwidth = __SHIFTOUT(descs[0], CPUID_PERF_NBWGPPC); @@ -267,9 +276,8 @@ tprof_intel_establish(tprof_backend_soft { uint64_t xc; - if (tprof_intel_ident() == TPROF_IDENT_NONE) { + if (tprof_intel_ident() == TPROF_IDENT_NONE) return ENOTSUP; - } KASSERT(intel_nmi_handle == NULL); intel_nmi_handle = nmi_establish(tprof_intel_nmi, sc); Index: src/usr.sbin/tprof/tprof.8 diff -u src/usr.sbin/tprof/tprof.8:1.24 src/usr.sbin/tprof/tprof.8:1.24.2.1 --- src/usr.sbin/tprof/tprof.8:1.24 Fri Dec 16 08:02:04 2022 +++ src/usr.sbin/tprof/tprof.8 Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -.\" $NetBSD: tprof.8,v 1.24 2022/12/16 08:02:04 ryo Exp $ +.\" $NetBSD: tprof.8,v 1.24.2.1 2023/06/21 22:34:51 martin Exp $ .\" .\" Copyright (c)2011 YAMAMOTO Takashi, .\" All rights reserved. @@ -24,7 +24,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 16, 2022 +.Dd April 17, 2023 .Dt TPROF 8 .Os .Sh NAME @@ -37,7 +37,9 @@ .Sh DESCRIPTION The .Nm -tool can be used to monitor hardware events (PMCs) during the execution of +tool can be used to monitor hardware events +.Tn ( PMC Ns s ) +during the execution of certain commands. .Pp The @@ -48,11 +50,8 @@ keeps recording samples from the kernel and reports statistics to the standard error. .Pp The -.Nm tprof -pseudo driver and a suitable backend should be loaded beforehand. -See .Xr tprof 4 -for the details. +pseudo driver and a suitable backend should be loaded beforehand. .Pp The .Nm @@ -61,28 +60,51 @@ The first argument, .Ar op , specifies the action to take. Valid actions are: -.Bl -tag -width offline -offset indent -.It list -Display a list of performance counter events available on the system. -.It monitor Xo -.Fl e -.Ar name[:option][,scale] +.Bl -tag -width Cm +. +.It Cm list +. +Display the following information: +.Bl -bullet -compact +.It +a list of performance counter events available on the system +.It +the maximum number of counters that can be used simultaneously +.It +the default counter for +.Cm monitor +and +.Cm top +commands +.El +. +.It Cm monitor Xo +.Op Fl e Ar name\| Ns Oo Cm \&: Ns Ar option\^ Oc Ns Oo Cm \&, Ns Ar scale\^ Oc .Op Fl e Ar ... .Op Fl o Ar outfile .Ar command .Xc -Monitor the execution of command +. +Monitor the execution of .Ar command . +The .Ar name -specifies the name of the event to count; it must be taken from the list of +specifies the event to count; it must be taken from the list of available events. .Ar option specifies the source of the event; it must be a combination of -.Ar u +.Cm u (userland) and -.Ar k -(kernel). If omitted, it is assumed that both are specified. -The collected samples are written into the file +.Cm k +(kernel). +If omitted, it is assumed that both are specified. +Multiple +.Fl e +arguments can be specified. +If none of the +.Fl e +arguments are specified, the CPU's default counter is used. +.Pp .Ar scale specifies the ratio of the speed to the cycle counter, or the counter until overflow. @@ -91,38 +113,43 @@ speed of the cycle counter by default, b too large (counter increasing too slowly) to be sufficient for profiling. For example, to specify an event that increases about 1000 times slower than the cycle counter, specify -.Dq Pa -e event,1000 . +.Ql -e event,1000 . Also, if -.Dq Pa -e event,=200 +.Ql -e event,=200 is specified, profiling is performed every time the counter is increased by 200. +.Pp +The collected samples are written into the file .Ar outfile if specified. The default is -.Dq Pa tprof.out . -.It count Xo -.Fl e -.Ar name[:option] +.Pa tprof.out . +. +.It Cm count Xo +.Fl e Ar name\| Ns Op Cm \&: Ns Ar option .Op Fl e Ar ... .Op Fl i Ar interval .Ar command .Xc +. Same as -.Ar monitor , +.Cm monitor , but does not do any profiling, only outputs counters every .Ar interval second. -.It analyze Xo +. +.It Cm analyze Xo .Op Fl CkLPs .Op Fl p Ar pid .Ar file .Xc +. Analyze the samples produced by a previous run of -.Nm tprof , +.Nm , stored in .Ar file , and generate a plain text representation of them. -.Bl -tag -width XPXpidXX -offset indent +.Bl -tag -width Fl .It Fl C Don't distinguish CPUs. All samples are treated as its CPU number is 0. @@ -142,27 +169,28 @@ and ignore the rest. .It Fl s Per symbol. .El -.It top Xo -.Oo -.Fl e -.Ar name[,scale] +. +.It Cm top Xo +.Op Fl acu +.Op Fl e Ar name\| Ns Oo Cm \&, Ns Ar scale\^ Oc .Op Fl e Ar ... -.Oc .Op Fl i Ar interval -.Op Fl acu .Xc +. Displays profiling results in real-time. .Ar name specifies the name of the event to count. -.Bl -tag -width XXintervalX -offset indent -.It Fl i Ar interval -set the update interval in seconds. The default value is 1. +.Bl -tag -width Fl .It Fl a -Starts in accumulation mode. The display is updated every +Starts in accumulation mode. +The display is updated every .Ar interval second, but the values are accumulative. .It Fl c -show the delta of the event counters. +Show the delta of the event counters. +.It Fl i Ar interval +Set the update interval in seconds. +The default value is 1. .It Fl u Userland processes are also included in the profiling. .El @@ -172,23 +200,27 @@ While .Ar top is running, it accepts commands from the terminal. These commands are currently recognized: -.Bl -tag -width XXcommandsX -offset indent -.It Ic a +.Bl -tag -width Ic +.It Aq Ic a toggle accumurative mode. -.It Ic c +.It Aq Ic c shows/hides the event counters. -.It Ic q +.It Aq Ic q quit .Nm . -.It Ic z +.It Aq Ic z clear accumulated data. .El .El .Sh EXAMPLES The following command profiles the system during 20 seconds and writes the -samples into the file myfile.out. +samples into the file +.Pa myfile.out . +.Pp .Dl # tprof monitor -e llc-misses:k -o myfile.out sleep 20 +.Pp The following command displays the results of the sampling. +.Pp .Dl # tprof analyze myfile.out .Sh SUPPORT The following CPU models are supported: @@ -208,7 +240,7 @@ x86 AMD Family 19h .It x86 Intel Generic (all Intel CPUs) .It -x86 Intel Skylake/Kabylake +x86 Intel Skylake, Kabylake and Cometlake .It x86 Intel Silvermont/Airmont .It @@ -220,7 +252,7 @@ x86 Intel Goldmont Plus The .Nm utility reports the following statistics about the activities of the -.Nm tprof +.Xr tprof 4 pseudo driver. .Bl -tag -width dropbuf_samples .It sample Index: src/usr.sbin/tprof/tprof.c diff -u src/usr.sbin/tprof/tprof.c:1.18.2.1 src/usr.sbin/tprof/tprof.c:1.18.2.2 --- src/usr.sbin/tprof/tprof.c:1.18.2.1 Mon Dec 26 11:23:56 2022 +++ src/usr.sbin/tprof/tprof.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.c,v 1.18.2.1 2022/12/26 11:23:56 martin Exp $ */ +/* $NetBSD: tprof.c,v 1.18.2.2 2023/06/21 22:34:51 martin Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -57,7 +57,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: tprof.c,v 1.18.2.1 2022/12/26 11:23:56 martin Exp $"); +__RCSID("$NetBSD: tprof.c,v 1.18.2.2 2023/06/21 22:34:51 martin Exp $"); #endif /* not lint */ #include <sys/atomic.h> @@ -260,7 +260,12 @@ process_stat(void *arg) static void tprof_list(int argc, char **argv) { - printf("%u events can be counted at the same time\n", ncounters); + const char *defaultevent = tprof_cycle_event_name(); + + printf("%u events can be counted at the same time.\n", ncounters); + if (defaultevent != NULL) + printf("The default counter for monitor and top command is " + "\"%s\".\n", defaultevent); tprof_event_list(); } @@ -356,6 +361,29 @@ tprof_parse_event(tprof_param_t *param, return error; } +const char * +tprof_cycle_event_name(void) +{ + const char *cycleevent; + + switch (tprof_info.ti_ident) { + case TPROF_IDENT_INTEL_GENERIC: + cycleevent = "unhalted-core-cycles"; + break; + case TPROF_IDENT_AMD_GENERIC: + cycleevent = "LsNotHaltedCyc"; + break; + case TPROF_IDENT_ARMV8_GENERIC: + case TPROF_IDENT_ARMV7_GENERIC: + cycleevent = "CPU_CYCLES"; + break; + default: + cycleevent = NULL; + break; + } + return cycleevent; +} + static void tprof_monitor_common(bool do_profile, int argc, char **argv) { @@ -404,8 +432,17 @@ tprof_monitor_common(bool do_profile, in } argc -= optind; argv += optind; - if (argc == 0 || nevent == 0) { + if (argc == 0) usage(); + if (nevent == 0) { + const char *defaultevent = tprof_cycle_event_name(); + if (defaultevent == NULL) + errx(EXIT_FAILURE, "cpu not supported"); + + tprof_event_lookup(defaultevent, ¶ms[nevent]); + eventname[nevent] = defaultevent; + params[nevent].p_flags |= TPROF_PARAM_KERN; + nevent++; } if (do_profile) { Index: src/usr.sbin/tprof/tprof.h diff -u src/usr.sbin/tprof/tprof.h:1.4 src/usr.sbin/tprof/tprof.h:1.4.2.1 --- src/usr.sbin/tprof/tprof.h:1.4 Fri Dec 16 08:02:04 2022 +++ src/usr.sbin/tprof/tprof.h Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.h,v 1.4 2022/12/16 08:02:04 ryo Exp $ */ +/* $NetBSD: tprof.h,v 1.4.2.1 2023/06/21 22:34:51 martin Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -34,6 +34,7 @@ extern int ncpu; extern int devfd; extern u_int ncounters; +const char *tprof_cycle_event_name(void); int tprof_event_init(uint32_t); void tprof_event_list(void); void tprof_event_lookup(const char *, struct tprof_param *); Index: src/usr.sbin/tprof/tprof_top.c diff -u src/usr.sbin/tprof/tprof_top.c:1.7.2.1 src/usr.sbin/tprof/tprof_top.c:1.7.2.2 --- src/usr.sbin/tprof/tprof_top.c:1.7.2.1 Sat Dec 24 17:17:27 2022 +++ src/usr.sbin/tprof/tprof_top.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_top.c,v 1.7.2.1 2022/12/24 17:17:27 martin Exp $ */ +/* $NetBSD: tprof_top.c,v 1.7.2.2 2023/06/21 22:34:51 martin Exp $ */ /*- * Copyright (c) 2022 Ryo Shimizu <r...@nerv.org> @@ -28,7 +28,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: tprof_top.c,v 1.7.2.1 2022/12/24 17:17:27 martin Exp $"); +__RCSID("$NetBSD: tprof_top.c,v 1.7.2.2 2023/06/21 22:34:51 martin Exp $"); #endif /* not lint */ #include <sys/param.h> @@ -118,29 +118,6 @@ static uint64_t *sample_n_per_event_cpu[ static uint64_t *counters; /* counters[2][ncpu][nevent] */ static u_int counters_i; -static const char * -cycle_event_name(void) -{ - const char *cycleevent; - - switch (tprof_info.ti_ident) { - case TPROF_IDENT_INTEL_GENERIC: - cycleevent = "unhalted-core-cycles"; - break; - case TPROF_IDENT_AMD_GENERIC: - cycleevent = "LsNotHaltedCyc"; - break; - case TPROF_IDENT_ARMV8_GENERIC: - case TPROF_IDENT_ARMV7_GENERIC: - cycleevent = "CPU_CYCLES"; - break; - default: - cycleevent = NULL; - break; - } - return cycleevent; -} - static void reset_cursor_pos(void) { @@ -978,7 +955,7 @@ tprof_top(int argc, char **argv) tprof_top_usage(); if (nevent == 0) { - const char *defaultevent = cycle_event_name(); + const char *defaultevent = tprof_cycle_event_name(); if (defaultevent == NULL) die_errc(EXIT_FAILURE, 0, "cpu not supported"); Index: src/usr.sbin/tprof/arch/tprof_x86.c diff -u src/usr.sbin/tprof/arch/tprof_x86.c:1.15 src/usr.sbin/tprof/arch/tprof_x86.c:1.15.2.1 --- src/usr.sbin/tprof/arch/tprof_x86.c:1.15 Thu Dec 8 05:29:27 2022 +++ src/usr.sbin/tprof/arch/tprof_x86.c Wed Jun 21 22:34:51 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_x86.c,v 1.15 2022/12/08 05:29:27 msaitoh Exp $ */ +/* $NetBSD: tprof_x86.c,v 1.15.2.1 2023/06/21 22:34:51 martin Exp $ */ /* * Copyright (c) 2018-2019 The NetBSD Foundation, Inc. @@ -60,7 +60,8 @@ struct event_table { static struct event_table *cpuevents = NULL; -static void x86_cpuid(unsigned int *eax, unsigned int *ebx, +static void +x86_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { asm volatile("cpuid" @@ -429,7 +430,7 @@ static struct name_to_event intel_skylak { "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", 0x60, 0x04, true }, { "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", 0x60, 0x08, true }, { "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", - 0x60, 0x10, true }, + 0x60, 0x10, true }, { "IDQ.MITE_UOPS", 0x79, 0x04, true }, { "IDQ.DSB_UOPS", 0x79, 0x08, true }, { "IDQ.MS_MITE_UOPS", 0x79, 0x20, true }, @@ -571,6 +572,8 @@ init_intel_generic(void) case 0x5e: /* Skylake */ case 0x8e: /* Kabylake */ case 0x9e: /* Kabylake */ + case 0xa5: /* Cometlake */ + case 0xa6: /* Cometlake */ table->next = init_intel_skylake_kabylake(); break; }