Author: mmacy
Date: Mon Jun  4 01:10:23 2018
New Revision: 334595
URL: https://svnweb.freebsd.org/changeset/base/334595

Log:
  hwpmc: support sampling both kernel and user stacks when interrupted in kernel
  
  This adds the -U options to pmcstat which will attribute in-kernel samples
  back to the user stack that invoked the system call. It is not the default,
  because when looking at kernel profiles it is generally more desirable to
  merge all instances of a given system call together.
  
  Although heavily revised, this change is directly derived from D7350 by
  Jonathan T. Looney.
  
  Obtained from: jtl
  Sponsored by: Juniper Networks, Limelight Networks

Modified:
  head/sys/dev/hwpmc/hwpmc_logging.c
  head/sys/dev/hwpmc/hwpmc_mod.c
  head/sys/kern/subr_trap.c
  head/sys/sys/pmc.h
  head/sys/sys/pmckern.h
  head/sys/sys/proc.h
  head/usr.sbin/pmcstat/pmcpl_callgraph.c
  head/usr.sbin/pmcstat/pmcstat.8
  head/usr.sbin/pmcstat/pmcstat.c

Modified: head/sys/dev/hwpmc/hwpmc_logging.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_logging.c  Sun Jun  3 23:49:02 2018        
(r334594)
+++ head/sys/dev/hwpmc/hwpmc_logging.c  Mon Jun  4 01:10:23 2018        
(r334595)
@@ -250,11 +250,9 @@ pmc_plb_rele(struct pmclog_buffer *plb)
        mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx);
 }
 
-
 /*
  * Get a log buffer
  */
-
 static int
 pmclog_get_buffer(struct pmc_owner *po)
 {
@@ -345,7 +343,6 @@ pmclog_proc_ignite(void *handle, struct pmc_owner *po)
  *
  * This function is executed by each pmc owner's helper thread.
  */
-
 static void
 pmclog_loop(void *arg)
 {
@@ -846,12 +843,15 @@ pmclog_schedule_one_cond(void *arg)
 {
        struct pmc_owner *po = arg;
        struct pmclog_buffer *plb;
+       int cpu;
 
        spinlock_enter();
+       cpu = curcpu;
        /* tell hardclock not to run again */
-       if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
+       if (PMC_CPU_HAS_SAMPLES(cpu))
                PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
-       plb = po->po_curbuf[curcpu];
+       pmc_flush_samples(cpu);
+       plb = po->po_curbuf[cpu];
        if (plb && plb->plb_ptr != plb->plb_base)
                pmclog_schedule_io(po, 1);
        spinlock_exit();

Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c      Sun Jun  3 23:49:02 2018        
(r334594)
+++ head/sys/dev/hwpmc/hwpmc_mod.c      Mon Jun  4 01:10:23 2018        
(r334595)
@@ -207,6 +207,8 @@ static int  pmc_debugflags_parse(char *newstr, char *fe
 #endif
 
 static int     load(struct module *module, int cmd, void *arg);
+static int     pmc_add_sample(int cpu, int ring, struct pmc *pm,
+    struct trapframe *tf, int inuserspace);
 static void    pmc_add_thread_descriptors_from_proc(struct proc *p,
     struct pmc_process *pp);
 static int     pmc_attach_process(struct proc *p, struct pmc *pm);
@@ -248,6 +250,7 @@ static void pmc_process_samples(int cpu, int soft);
 static void    pmc_release_pmc_descriptor(struct pmc *pmc);
 static void    pmc_process_thread_add(struct thread *td);
 static void    pmc_process_thread_delete(struct thread *td);
+static void    pmc_process_thread_userret(struct thread *td);
 static void    pmc_remove_owner(struct pmc_owner *po);
 static void    pmc_remove_process_descriptor(struct pmc_process *pp);
 static void    pmc_restore_cpu_binding(struct pmc_binding *pb);
@@ -265,7 +268,7 @@ static int generic_switch_in(struct pmc_cpu *pc, struc
 static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp);
 static struct pmc_mdep *pmc_generic_cpu_initialize(void);
 static void pmc_generic_cpu_finalize(struct pmc_mdep *md);
-
+static void pmc_post_callchain_callback(void);
 /*
  * Kernel tunables and sysctl(8) interface.
  */
@@ -291,6 +294,10 @@ SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer
                                   &pmc_stats.pm_buffer_requests_failed, "# of 
buffer requests which failed");
 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW,
                                   &pmc_stats.pm_log_sweeps, "# of ?");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW,
+                                  &pmc_stats.pm_merges, "# of times kernel 
stack was found for user trace");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW,
+                                  &pmc_stats.pm_overwrites, "# of times a 
sample was overwritten before being logged");
 
 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN,
@@ -1703,6 +1710,19 @@ pmc_process_thread_delete(struct thread *td)
 }
 
 /*
+ * A userret() call for a thread.
+ */
+static void
+pmc_process_thread_userret(struct thread *td)
+{
+
+       thread_lock(td);
+       curthread->td_flags |= TDF_ASTPENDING;
+       thread_unlock(td);
+       pmc_post_callchain_callback();
+}
+
+/*
  * A mapping change for a process.
  */
 
@@ -2030,6 +2050,7 @@ const char *pmc_hooknames[] = {
        "SOFTSAMPLING",
        "THR-CREATE",
        "THR-EXIT",
+       "THR-USERRET",
 };
 #endif
 
@@ -2194,6 +2215,7 @@ pmc_hook_handler(struct thread *td, int function, void
                cpu = PCPU_GET(cpuid);
                pmc_process_samples(cpu, PMC_HR);
                pmc_process_samples(cpu, PMC_SR);
+               pmc_process_samples(cpu, PMC_UR);
                break;
 
        case PMC_FN_MMAP:
@@ -2214,6 +2236,11 @@ pmc_hook_handler(struct thread *td, int function, void
 
                pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR,
                    (struct trapframe *) arg);
+
+               KASSERT(td->td_pinned == 1,
+                       ("[pmc,%d] invalid td_pinned value", __LINE__));
+               sched_unpin();  /* Can migrate safely now. */
+
                td->td_pflags &= ~TDP_CALLCHAIN;
                break;
 
@@ -2223,8 +2250,18 @@ pmc_hook_handler(struct thread *td, int function, void
                 */
                KASSERT(td == curthread, ("[pmc,%d] td != curthread",
                    __LINE__));
-               pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR,
+
+               cpu = PCPU_GET(cpuid);
+               pmc_capture_user_callchain(cpu, PMC_SR,
                    (struct trapframe *) arg);
+               pmc_capture_user_callchain(cpu, PMC_UR,
+                   (struct trapframe *) arg);
+
+               KASSERT(td->td_pinned == 1,
+                   ("[pmc,%d] invalid td_pinned value", __LINE__));
+
+               sched_unpin();  /* Can migrate safely now. */
+
                td->td_pflags &= ~TDP_CALLCHAIN;
                break;
 
@@ -2245,6 +2282,12 @@ pmc_hook_handler(struct thread *td, int function, void
                pmc_process_thread_delete(td);
                break;
 
+       case PMC_FN_THR_USERRET:
+               KASSERT(td == curthread, ("[pmc,%d] td != curthread",
+                   __LINE__));
+               pmc_process_thread_userret(td);
+               break;
+
        default:
 #ifdef HWPMC_DEBUG
                KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
@@ -2658,7 +2701,9 @@ pmc_wait_for_pmc_idle(struct pmc *pm)
         * Loop (with a forced context switch) till the PMC's runcount
         * comes down to zero.
         */
+       pmclog_flush(pm->pm_owner);
        while (counter_u64_fetch(pm->pm_runcount) > 0) {
+               pmclog_flush(pm->pm_owner);
 #ifdef HWPMC_DEBUG
                maxloop--;
                KASSERT(maxloop > 0,
@@ -3264,7 +3309,7 @@ pmc_class_to_classdep(enum pmc_class class)
        return (NULL);
 }
 
-#ifdef HWPMC_DEBUG
+#if defined(HWPMC_DEBUG) && defined(KTR)
 static const char *pmc_op_to_name[] = {
 #undef __PMC_OP
 #define        __PMC_OP(N, D)  #N ,
@@ -3806,11 +3851,26 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
                 */
 
                if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
-                   PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
+                   PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN |
+                   PMC_F_USERCALLCHAIN)) != 0) {
                        error = EINVAL;
                        break;
                }
 
+               /* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN */
+               if ((pa.pm_flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) ==
+                   PMC_F_USERCALLCHAIN) {
+                       error = EINVAL;
+                       break;
+               }
+
+               /* PMC_F_USERCALLCHAIN is only valid for sampling mode */
+               if (pa.pm_flags & PMC_F_USERCALLCHAIN &&
+                       mode != PMC_MODE_TS && mode != PMC_MODE_SS) {
+                       error = EINVAL;
+                       break;
+               }
+
                /* process logging options are not allowed for system PMCs */
                if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
                    (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
@@ -4533,8 +4593,6 @@ pmc_post_callchain_callback(void)
 }
 
 /*
- * Interrupt processing.
- *
  * Find a free slot in the per-cpu array of samples and capture the
  * current callchain there.  If a sample was successfully added, a bit
  * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
@@ -4544,8 +4602,8 @@ pmc_post_callchain_callback(void)
  * use any of the locking primitives supplied by the OS.
  */
 
-int
-pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
+static int
+pmc_add_sample(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
     int inuserspace)
 {
        int error, callchaindepth;
@@ -4561,7 +4619,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
        psb = pmc_pcpu[cpu]->pc_sb[ring];
 
        ps = psb->ps_write;
-       if (ps->ps_nsamples) {  /* in use, reader hasn't caught up */
+       if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+               counter_u64_add(ps->ps_pmc->pm_runcount, -1);
+               counter_u64_add(pmc_stats.pm_overwrites, 1);
+               ps->ps_nsamples = 0;
+       } else if (ps->ps_nsamples) {   /* in use, reader hasn't caught up */
                pm->pm_pcpu_state[cpu].pps_stalled = 1;
                counter_u64_add(pmc_stats.pm_intr_bufferfull, 1);
                PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
@@ -4573,7 +4635,6 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
                goto done;
        }
 
-
        /* Fill in entry. */
        PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
            (void *) tf, inuserspace,
@@ -4619,7 +4680,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
        }
 
        ps->ps_nsamples = callchaindepth;       /* mark entry as in use */
-
+       if (ring == PMC_UR) {
+               ps->ps_nsamples_actual = callchaindepth;        /* mark entry 
as in use */
+               ps->ps_nsamples = PMC_SAMPLE_INUSE;
+       } else
+               ps->ps_nsamples = callchaindepth;       /* mark entry as in use 
*/
        /* increment write pointer, modulo ring buffer size */
        ps++;
        if (ps == psb->ps_fence)
@@ -4636,6 +4701,30 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
 }
 
 /*
+ * Interrupt processing.
+ *
+ * This function is meant to be called from an NMI handler.  It cannot
+ * use any of the locking primitives supplied by the OS.
+ */
+
+int
+pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
+    int inuserspace)
+{
+       struct thread *td;
+
+       td = curthread;
+       if ((pm->pm_flags & PMC_F_USERCALLCHAIN) &&
+               td && td->td_proc &&
+               (td->td_proc->p_flag & P_KPROC) == 0 &&
+               !inuserspace) {
+               atomic_add_int(&curthread->td_pmcpend, 1);
+               return (pmc_add_sample(cpu, PMC_UR, pm, tf, 0));
+       }
+       return (pmc_add_sample(cpu, ring, pm, tf, inuserspace));
+}
+
+/*
  * Capture a user call chain.  This function will be called from ast()
  * before control returns to userland and before the process gets
  * rescheduled.
@@ -4648,6 +4737,7 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
        struct thread *td;
        struct pmc_sample *ps, *ps_end;
        struct pmc_samplebuffer *psb;
+       int nsamples, nrecords, pass;
 #ifdef INVARIANTS
        int ncallchains;
        int nfree;
@@ -4664,6 +4754,11 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
        ncallchains = 0;
        nfree = 0;
 #endif
+       nrecords = INT_MAX;
+       pass = 0;
+ restart:
+       if (ring == PMC_UR)
+               nrecords = atomic_readandclear_32(&td->td_pmcpend);
 
        /*
         * Iterate through all deferred callchain requests.
@@ -4675,6 +4770,11 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
        ps_end = psb->ps_write;
        do {
 #ifdef INVARIANTS
+               if (ps->ps_nsamples == PMC_SAMPLE_FREE) {
+                       nfree++;
+                       goto next;
+               }
+
                if ((ps->ps_pmc == NULL) ||
                    (ps->ps_pmc->pm_state != PMC_STATE_RUNNING))
                        nfree++;
@@ -4697,37 +4797,91 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
                KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
                    ("[pmc,%d] runcount %ld", __LINE__, (unsigned 
long)counter_u64_fetch(pm->pm_runcount)));
 
+               if (ring == PMC_UR) {
+                       nsamples = ps->ps_nsamples_actual;
+                       counter_u64_add(pmc_stats.pm_merges, 1);
+               } else
+                       nsamples = 0;
+
                /*
                 * Retrieve the callchain and mark the sample buffer
                 * as 'processable' by the timer tick sweep code.
                 */
-               ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
-                   pmc_callchaindepth, tf);
 
-#ifdef INVARIANTS
+#ifdef INVARIANTS
                ncallchains++;
 #endif
 
+               if (__predict_true(nsamples < pmc_callchaindepth - 1))
+                       nsamples += pmc_save_user_callchain(ps->ps_pc + 
nsamples,
+                      pmc_callchaindepth - nsamples - 1, tf);
+               wmb();
+               ps->ps_nsamples = nsamples;
+               if (nrecords-- == 1)
+                       break;
 next:
                /* increment the pointer, modulo sample ring size */
                if (++ps == psb->ps_fence)
                        ps = psb->ps_samples;
        } while (ps != ps_end);
+       if (__predict_false(ring == PMC_UR && td->td_pmcpend)) {
+               if (pass == 0) {
+                       pass = 1;
+                       goto restart;
+               }
+               /* only collect samples for this part once */
+               td->td_pmcpend = 0;
+       }
 
-#ifdef INVARIANTS
-       KASSERT(ncallchains > 0 || nfree > 0,
-           ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
-               cpu));
+#ifdef INVARIANTS
+       if (ring == PMC_HR)
+               KASSERT(ncallchains > 0 || nfree > 0,
+                   ("[pmc,%d] cpu %d didn't find a sample to collect", 
__LINE__,
+                           cpu));
 #endif
 
-       KASSERT(td->td_pinned == 1,
-           ("[pmc,%d] invalid td_pinned value", __LINE__));
-       sched_unpin();  /* Can migrate safely now. */
-
        /* mark CPU as needing processing */
        DPCPU_SET(pmc_sampled, 1);
 }
 
+
+static void
+pmc_flush_ring(int cpu, int ring)
+{
+       struct pmc *pm;
+       struct pmc_sample *ps;
+       struct pmc_samplebuffer *psb;
+       int n;
+
+       psb = pmc_pcpu[cpu]->pc_sb[ring];
+
+       for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
+
+               ps = psb->ps_read;
+               if (ps->ps_nsamples == PMC_SAMPLE_FREE)
+                       goto next;
+               pm = ps->ps_pmc;
+               counter_u64_add(pm->pm_runcount, -1);
+               ps->ps_nsamples = PMC_SAMPLE_FREE;
+               /* increment read pointer, modulo sample size */
+       next:
+               if (++ps == psb->ps_fence)
+                       psb->ps_read = psb->ps_samples;
+               else
+                       psb->ps_read = ps;
+       }
+}
+
+void
+pmc_flush_samples(int cpu)
+{
+       int n;
+
+       for (n = 0; n < PMC_NUM_SR; n++)
+               pmc_flush_ring(cpu, n);
+}
+
+
 /*
  * Process saved PC samples.
  */
@@ -4788,23 +4942,20 @@ pmc_process_samples(int cpu, int ring)
                 * its owner, and if the PC is in user mode, update
                 * profiling statistics like timer-based profiling
                 * would have done.
+                *
+                * Otherwise, this is either a sampling-mode PMC that
+                * is attached to a different process than its owner,
+                * or a system-wide sampling PMC. Dispatch a log
+                * entry to the PMC's owner process.
                 */
                if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
                        if (ps->ps_flags & PMC_CC_F_USERSPACE) {
                                td = FIRST_THREAD_IN_PROC(po->po_owner);
                                addupc_intr(td, ps->ps_pc[0], 1);
                        }
-                       goto entrydone;
-               }
+               } else
+                       pmclog_process_callchain(pm, ps);
 
-               /*
-                * Otherwise, this is either a sampling mode PMC that
-                * is attached to a different process than its owner,
-                * or a system-wide sampling PMC.  Dispatch a log
-                * entry to the PMC's owner process.
-                */
-               pmclog_process_callchain(pm, ps);
-
        entrydone:
                ps->ps_nsamples = 0; /* mark entry as free */
                counter_u64_add(pm->pm_runcount, -1);
@@ -5282,6 +5433,8 @@ pmc_initialize(void)
        pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK);
        pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK);
        pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK);
+       pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK);
+       pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK);
 
 #ifdef HWPMC_DEBUG
        /* parse debug flags first */
@@ -5427,6 +5580,24 @@ pmc_initialize(void)
                            (n * pmc_callchaindepth);
 
                pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb;
+
+               sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
+                       pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
+                   M_WAITOK|M_ZERO);
+               sb->ps_read = sb->ps_write = sb->ps_samples;
+               sb->ps_fence = sb->ps_samples + pmc_nsamples;
+
+               KASSERT(pmc_pcpu[cpu] != NULL,
+                   ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
+
+               sb->ps_callchains = malloc_domain(pmc_callchaindepth * 
pmc_nsamples *
+                   sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO);
+
+               for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
+                       ps->ps_pc = sb->ps_callchains +
+                           (n * pmc_callchaindepth);
+
+               pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb;
        }
 
        /* allocate space for the row disposition array */
@@ -5637,10 +5808,15 @@ pmc_cleanup(void)
                KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL,
                    ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__,
                        cpu));
+               KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL,
+                   ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__,
+                       cpu));
                free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC);
                free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC);
                free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC);
                free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC);
+               free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC);
+               free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC);
                free_domain(pmc_pcpu[cpu], M_PMC);
        }
 
@@ -5669,6 +5845,8 @@ pmc_cleanup(void)
        counter_u64_free(pmc_stats.pm_buffer_requests);
        counter_u64_free(pmc_stats.pm_buffer_requests_failed);
        counter_u64_free(pmc_stats.pm_log_sweeps);
+       counter_u64_free(pmc_stats.pm_merges);
+       counter_u64_free(pmc_stats.pm_overwrites);
        sx_xunlock(&pmc_sx);    /* we are done */
 }
 

Modified: head/sys/kern/subr_trap.c
==============================================================================
--- head/sys/kern/subr_trap.c   Sun Jun  3 23:49:02 2018        (r334594)
+++ head/sys/kern/subr_trap.c   Mon Jun  4 01:10:23 2018        (r334595)
@@ -145,6 +145,11 @@ userret(struct thread *td, struct trapframe *frame)
         */
        if (p->p_flag & P_PROFIL)
                addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
+
+#ifdef HWPMC_HOOKS
+       if (PMC_THREAD_HAS_SAMPLES(td))
+               PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL);
+#endif
        /*
         * Let the scheduler adjust our priority etc.
         */

Modified: head/sys/sys/pmc.h
==============================================================================
--- head/sys/sys/pmc.h  Sun Jun  3 23:49:02 2018        (r334594)
+++ head/sys/sys/pmc.h  Mon Jun  4 01:10:23 2018        (r334595)
@@ -369,13 +369,14 @@ enum pmc_ops {
 #define        PMC_F_KGMON             0x00000040 /*OP ALLOCATE kgmon(8) 
profiling */
 /* V2 API */
 #define        PMC_F_CALLCHAIN         0x00000080 /*OP ALLOCATE capture 
callchains */
+#define        PMC_F_USERCALLCHAIN     0x00000100 /*OP ALLOCATE use userspace 
stack */
 
 /* internal flags */
 #define        PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/
 #define        PMC_F_NEEDS_LOGFILE     0x00020000 /*needs log file */
 #define        PMC_F_ATTACH_DONE       0x00040000 /*attached at least once */
 
-#define        PMC_CALLCHAIN_DEPTH_MAX 128
+#define        PMC_CALLCHAIN_DEPTH_MAX 512
 
 #define        PMC_CC_F_USERSPACE      0x01       /*userspace callchain*/
 
@@ -568,6 +569,8 @@ struct pmc_driverstats {
        counter_u64_t   pm_buffer_requests_failed; /* #failed buffer requests */
        counter_u64_t   pm_log_sweeps;          /* #sample buffer processing
                                                   passes */
+       counter_u64_t   pm_merges;              /* merged k+u */
+       counter_u64_t   pm_overwrites;          /* UR overwrites */
 };
 #endif
 
@@ -643,11 +646,11 @@ struct pmc_op_getdyneventinfo {
 
 #define        PMC_HASH_SIZE                           1024
 #define        PMC_MTXPOOL_SIZE                        2048
-#define        PMC_LOG_BUFFER_SIZE                     128
-#define        PMC_NLOGBUFFERS_PCPU            8
-#define        PMC_NSAMPLES                            64
-#define        PMC_CALLCHAIN_DEPTH                     32
-#define        PMC_THREADLIST_MAX                      64
+#define        PMC_LOG_BUFFER_SIZE                     256
+#define        PMC_NLOGBUFFERS_PCPU                    32
+#define        PMC_NSAMPLES                            256
+#define        PMC_CALLCHAIN_DEPTH                     128
+#define        PMC_THREADLIST_MAX                      128
 
 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
 
@@ -923,9 +926,9 @@ struct pmc_hw {
 
 struct pmc_sample {
        uint16_t                ps_nsamples;    /* callchain depth */
+       uint16_t                ps_nsamples_actual;
        uint16_t                ps_cpu;         /* cpu number */
        uint16_t                ps_flags;       /* other flags */
-       uint8_t                 ps_pad[2];
        lwpid_t                 ps_tid;         /* thread id */
        pid_t                   ps_pid;         /* process PID or -1 */
        struct thread           *ps_td;         /* which thread */
@@ -954,7 +957,7 @@ struct pmc_samplebuffer {
 
 struct pmc_cpu {
        uint32_t        pc_state;       /* physical cpu number + flags */
-       struct pmc_samplebuffer *pc_sb[2]; /* space for samples */
+       struct pmc_samplebuffer *pc_sb[3]; /* space for samples */
        struct pmc_hw   *pc_hwpmcs[];   /* 'npmc' pointers */
 };
 
@@ -1203,7 +1206,7 @@ MALLOC_DECLARE(M_PMC);
 struct pmc_mdep *pmc_md_initialize(void);      /* MD init function */
 void   pmc_md_finalize(struct pmc_mdep *_md);  /* MD fini function */
 int    pmc_getrowdisp(int _ri);
-int    pmc_process_interrupt(int _cpu, int _soft, struct pmc *_pm,
+int    pmc_process_interrupt(int _cpu, int _ring, struct pmc *_pm,
     struct trapframe *_tf, int _inuserspace);
 int    pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
     struct trapframe *_tf);
@@ -1211,5 +1214,6 @@ int       pmc_save_user_callchain(uintptr_t *_cc, int 
_maxsa
     struct trapframe *_tf);
 struct pmc_mdep *pmc_mdep_alloc(int nclasses);
 void pmc_mdep_free(struct pmc_mdep *md);
+void pmc_flush_samples(int cpu);
 #endif /* _KERNEL */
 #endif /* _SYS_PMC_H_ */

Modified: head/sys/sys/pmckern.h
==============================================================================
--- head/sys/sys/pmckern.h      Sun Jun  3 23:49:02 2018        (r334594)
+++ head/sys/sys/pmckern.h      Mon Jun  4 01:10:23 2018        (r334595)
@@ -62,9 +62,12 @@
 #define        PMC_FN_SOFT_SAMPLING            11
 #define        PMC_FN_THR_CREATE               12
 #define        PMC_FN_THR_EXIT                 13
+#define        PMC_FN_THR_USERRET              14
 
 #define        PMC_HR  0       /* Hardware ring buffer */
 #define        PMC_SR  1       /* Software ring buffer */
+#define        PMC_UR  2       /* userret ring buffer */
+#define PMC_NUM_SR (PMC_UR+1)
 
 struct pmckern_procexec {
        int             pm_credentialschanged;
@@ -226,6 +229,9 @@ do {                                                \
 /* Check if a process is using HWPMCs.*/
 #define PMC_PROC_IS_USING_PMCS(p)                              \
        (__predict_false(p->p_flag & P_HWPMC))
+
+#define PMC_THREAD_HAS_SAMPLES(td)                             \
+       (__predict_false((td)->td_pmcpend))
 
 /* Check if a thread have pending user capture. */
 #define PMC_IS_PENDING_CALLCHAIN(p)                            \

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Sun Jun  3 23:49:02 2018        (r334594)
+++ head/sys/sys/proc.h Mon Jun  4 01:10:23 2018        (r334595)
@@ -375,6 +375,7 @@ struct thread {
        void            *td_lkpi_task;  /* LinuxKPI task struct pointer */
        TAILQ_ENTRY(thread) td_epochq;  /* (t) Epoch queue. */
        epoch_section_t td_epoch_section; /* (t) epoch section object */
+       int             td_pmcpend;
 };
 
 struct thread0_storage {

Modified: head/usr.sbin/pmcstat/pmcpl_callgraph.c
==============================================================================
--- head/usr.sbin/pmcstat/pmcpl_callgraph.c     Sun Jun  3 23:49:02 2018        
(r334594)
+++ head/usr.sbin/pmcstat/pmcpl_callgraph.c     Mon Jun  4 01:10:23 2018        
(r334595)
@@ -345,7 +345,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pm
 
        pc = cc[0];
        pmcid = pmcr->pr_pmcid;
-       parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
+       child = parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
        if (parent == NULL) {
                pmcstat_stats.ps_callchain_dubious_frames++;
                pmcr->pr_dubious_frames++;
@@ -384,7 +384,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pm
                        }
                }
                if (ppm == NULL)
-                       return;
+                       continue;
 
                image = ppm->ppm_image;
                loadaddress = ppm->ppm_lowpc + image->pi_vaddr -

Modified: head/usr.sbin/pmcstat/pmcstat.8
==============================================================================
--- head/usr.sbin/pmcstat/pmcstat.8     Sun Jun  3 23:49:02 2018        
(r334594)
+++ head/usr.sbin/pmcstat/pmcstat.8     Mon Jun  4 01:10:23 2018        
(r334595)
@@ -47,7 +47,7 @@
 .Op Fl R Ar logfilename
 .Op Fl S Ar event-spec
 .Op Fl T
-.Op Fl U Ar event-spec
+.Op Fl U
 .Op Fl W
 .Op Fl a Ar pathname
 .Op Fl c Ar cpu-spec
@@ -226,8 +226,10 @@ can be used: 'c+a' switch to accumulative mode, 'c+d' 
 to delta mode, 'm' merge PMCs, 'n' change view, 'p' show next
 PMC, ' ' pause, 'q' quit. calltree only: 'f' cost under threshold
 is seen as a dot.
-.It Fl U Ar event-spec
-Provide long description of event.
+.It Fl U
+Toggle capturing user-space call traces while in kernel mode.
+The default is for sampling PMCs to capture user-space callchain information
+while in user-space mode, and kernel callchain information while in kernel 
mode.
 .It Fl W
 Toggle logging the incremental counts seen by the threads of a
 tracked process each time they are scheduled on a CPU.

Modified: head/usr.sbin/pmcstat/pmcstat.c
==============================================================================
--- head/usr.sbin/pmcstat/pmcstat.c     Sun Jun  3 23:49:02 2018        
(r334594)
+++ head/usr.sbin/pmcstat/pmcstat.c     Mon Jun  4 01:10:23 2018        
(r334595)
@@ -374,7 +374,7 @@ pmcstat_show_usage(void)
            "\t -R file\t read events from \"file\"\n"
            "\t -S spec\t allocate a system-wide sampling PMC\n"
            "\t -T\t\t start in top mode\n"
-           "\t -U spec \t provide long description of counters matching spec\n"
+           "\t -U \t\n merged user kernel stack capture\n"
            "\t -W\t\t (toggle) show counts per context switch\n"
            "\t -a file\t print sampled PCs and callgraph to \"file\"\n"
            "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n"
@@ -432,7 +432,8 @@ main(int argc, char **argv)
        int option, npmc;
        int c, check_driver_stats; 
        int do_callchain, do_descendants, do_logproccsw, do_logprocexit;
-       int do_print, do_read, do_listcounters, do_descr, do_long_descr;
+       int do_print, do_read, do_listcounters, do_descr;
+       int do_userspace;
        size_t len;
        int graphdepth;
        int pipefd[2], rfd;
@@ -455,7 +456,7 @@ main(int argc, char **argv)
        do_callchain            = 1;
        do_descr                = 0;
        do_descendants          = 0;
-       do_long_descr           = 0;
+       do_userspace            = 0;
        do_logproccsw           = 0;
        do_logprocexit          = 0;
        do_listcounters         = 0;
@@ -510,7 +511,7 @@ main(int argc, char **argv)
        CPU_COPY(&rootmask, &cpumask);
 
        while ((option = getopt(argc, argv,
-           "CD:EF:G:ILM:NO:P:R:S:TU:WZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) 
!= -1)
+           "CD:EF:G:ILM:NO:P:R:S:TUWZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) 
!= -1)
                switch (option) {
                case 'a':       /* Annotate + callgraph */
                        args.pa_flags |= FLAG_DO_ANNOTATE;
@@ -677,8 +678,11 @@ main(int argc, char **argv)
                                ev->ev_cpu = PMC_CPU_ANY;
 
                        ev->ev_flags = 0;
-                       if (do_callchain)
+                       if (do_callchain) {
                                ev->ev_flags |= PMC_F_CALLCHAIN;
+                               if (do_userspace)
+                                       ev->ev_flags |= PMC_F_USERCALLCHAIN;
+                       }
                        if (do_descendants)
                                ev->ev_flags |= PMC_F_DESCENDANTS;
                        if (do_logprocexit)
@@ -776,9 +780,9 @@ main(int argc, char **argv)
                        do_descr = 1;
                        event = optarg;
                        break;
-               case 'U':
-                       do_long_descr = 1;
-                       event = optarg;
+               case 'U':       /* toggle user-space callchain capture */
+                       do_userspace = !do_userspace;
+                       args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
                        break;
                case 'v':       /* verbose */
                        args.pa_verbosity++;
@@ -816,17 +820,15 @@ main(int argc, char **argv)
                        break;
 
                }
-       if ((do_listcounters | do_descr | do_long_descr) &&
+       if ((do_listcounters | do_descr) &&
                pmc_pmu_enabled() == 0)
                        errx(EX_USAGE, "pmu features not supported on host or 
hwpmc not loaded");
        if (do_listcounters) {
                pmc_pmu_print_counters(NULL);
        } else if (do_descr) {
                pmc_pmu_print_counter_desc(event);
-       } else if (do_long_descr) {
-               pmc_pmu_print_counter_desc_long(event);
        }
-       if (do_listcounters | do_descr | do_long_descr)
+       if (do_listcounters | do_descr)
                exit(0);
 
        args.pa_argc = (argc -= optind);
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to