On Mon, Jun 29, 2020 at 12:17 PM Anup Patel <a...@brainfault.org> wrote: > > On Mon, Jun 29, 2020 at 8:49 AM Zong Li <zong...@sifive.com> wrote: > > > > Add support for raw events and hardware cache events. Currently, we set > > the events by writing the mhpmeventN CSRs, it would raise an illegal > > instruction exception and trap into m-mode to emulate event selector > > CSRs access. It doesn't make sense because we shouldn't write the > > m-mode CSRs in s-mode, it would be better that set events through SBI > > call or the shadow CSRs of s-mode. We would change it later. > > > > Signed-off-by: Zong Li <zong...@sifive.com> > > --- > > arch/riscv/include/asm/perf_event.h | 65 ++++++--- > > arch/riscv/kernel/perf_event.c | 204 +++++++++++++++++++++++----- > > 2 files changed, 215 insertions(+), 54 deletions(-) > > > > diff --git a/arch/riscv/include/asm/perf_event.h > > b/arch/riscv/include/asm/perf_event.h > > index 062efd3a1d5d..41d515a1f331 100644 > > --- a/arch/riscv/include/asm/perf_event.h > > +++ b/arch/riscv/include/asm/perf_event.h > > @@ -14,39 +14,64 @@ > > > > #ifdef CONFIG_RISCV_BASE_PMU > > #define RISCV_BASE_COUNTERS 2 > > +#define RISCV_EVENT_COUNTERS 29 > > Same comment as DT documentation related to naming.
Change it as well. Thanks. > > Regards, > Anup > > > > +#define RISCV_TOTAL_COUNTERS (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS) > > > > /* > > - * The RISCV_MAX_COUNTERS parameter should be specified. > > - */ > > - > > -#define RISCV_MAX_COUNTERS 2 > > - > > -/* > > - * These are the indexes of bits in counteren register *minus* 1, > > - * except for cycle. It would be coherent if it can directly mapped > > - * to counteren bit definition, but there is a *time* register at > > - * counteren[1]. Per-cpu structure is scarce resource here. > > - * > > * According to the spec, an implementation can support counter up to > > * mhpmcounter31, but many high-end processors has at most 6 general > > * PMCs, we give the definition to MHPMCOUNTER8 here. > > */ > > -#define RISCV_PMU_CYCLE 0 > > -#define RISCV_PMU_INSTRET 1 > > -#define RISCV_PMU_MHPMCOUNTER3 2 > > -#define RISCV_PMU_MHPMCOUNTER4 3 > > -#define RISCV_PMU_MHPMCOUNTER5 4 > > -#define RISCV_PMU_MHPMCOUNTER6 5 > > -#define RISCV_PMU_MHPMCOUNTER7 6 > > -#define RISCV_PMU_MHPMCOUNTER8 7 > > +#define RISCV_PMU_CYCLE 0 > > +#define RISCV_PMU_INSTRET 2 > > +#define RISCV_PMU_HPMCOUNTER3 3 > > +#define RISCV_PMU_HPMCOUNTER4 4 > > +#define RISCV_PMU_HPMCOUNTER5 5 > > +#define RISCV_PMU_HPMCOUNTER6 6 > > +#define RISCV_PMU_HPMCOUNTER7 7 > > +#define RISCV_PMU_HPMCOUNTER8 8 > > + > > +#define RISCV_PMU_HPMCOUNTER_FIRST 3 > > +#define RISCV_PMU_HPMCOUNTER_LAST \ > > + (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1) > > > > #define RISCV_OP_UNSUPP (-EOPNOTSUPP) > > > > +/* Hardware cache event encoding */ > > +#define PERF_HW_CACHE_TYPE 0 > > +#define PERF_HW_CACHE_OP 8 > > +#define PERF_HW_CACHE_RESULT 16 > > +#define PERF_HW_CACHE_MASK 0xff > > + > > +/* config_base encoding */ > > +#define RISCV_PMU_TYPE_MASK 0x3 > > +#define RISCV_PMU_TYPE_BASE 0x1 > > +#define RISCV_PMU_TYPE_EVENT 0x2 > > +#define RISCV_PMU_EXCLUDE_MASK 0xc > > +#define RISCV_PMU_EXCLUDE_USER 0x3 > > +#define RISCV_PMU_EXCLUDE_KERNEL 0x4 > > + > > +/* > > + * Currently, machine-mode supports emulation of mhpmeventN. Setting > > mhpmeventN > > + * to raise an illegal instruction exception to set event types in > > machine-mode. > > + * Eventually, we should set event types through standard SBI call or the > > shadow > > + * CSRs of supervisor-mode, because it is weird for writing CSR of > > machine-mode > > + * explicitly in supervisor-mode. These macro should be removed in the > > future. > > + */ > > +#define CSR_MHPMEVENT3 0x323 > > +#define CSR_MHPMEVENT4 0x324 > > +#define CSR_MHPMEVENT5 0x325 > > +#define CSR_MHPMEVENT6 0x326 > > +#define CSR_MHPMEVENT7 0x327 > > +#define CSR_MHPMEVENT8 0x328 > > + > > struct cpu_hw_events { > > /* # currently enabled events*/ > > int n_events; > > /* currently enabled events */ > > - struct perf_event *events[RISCV_MAX_COUNTERS]; > > + struct perf_event *events[RISCV_EVENT_COUNTERS]; > > + /* bitmap of used event counters */ > > + unsigned long used_cntr_mask; > > /* vendor-defined PMU data */ > > void *platform; > > }; > > diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c > > index c835f0362d94..0cfcd6f1e57b 100644 > > --- a/arch/riscv/kernel/perf_event.c > > +++ b/arch/riscv/kernel/perf_event.c > > @@ -139,6 +139,53 @@ static const int > > riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] > > }, > > }; > > > > +/* > > + * Methods for checking and getting PMU information > > + */ > > + > > +static inline int is_base_counter(int idx) > > +{ > > + return (idx == RISCV_PMU_CYCLE || idx == RISCV_PMU_INSTRET); > > +} > > + > > +static inline int is_event_counter(int idx) > > +{ > > + return (idx >= RISCV_PMU_HPMCOUNTER_FIRST && > > + idx <= RISCV_PMU_HPMCOUNTER_LAST); > > +} > > + > > +static inline int get_available_counter(struct perf_event *event) > > +{ > > + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); > > + struct hw_perf_event *hwc = &event->hw; > > + unsigned long config_base = hwc->config_base & RISCV_PMU_TYPE_MASK; > > + unsigned long mask; > > + int ret; > > + > > + switch (config_base) { > > + case RISCV_PMU_TYPE_BASE: > > + ret = hwc->config; > > + if (WARN_ON_ONCE(!is_base_counter(ret))) > > + return -ENOSPC; > > + break; > > + case RISCV_PMU_TYPE_EVENT: > > + mask = ~cpuc->used_cntr_mask; > > + ret = find_next_bit(&mask, RISCV_PMU_HPMCOUNTER_LAST, 3); > > + if (WARN_ON_ONCE(!is_event_counter(ret))) > > + return -ENOSPC; > > + break; > > + default: > > + return -ENOENT; > > + } > > + > > + __set_bit(ret, &cpuc->used_cntr_mask); > > + > > + return ret; > > +} > > + > > +/* > > + * Map generic hardware event > > + */ > > static int riscv_map_hw_event(u64 config) > > { > > if (config >= riscv_pmu->max_events) > > @@ -147,32 +194,28 @@ static int riscv_map_hw_event(u64 config) > > return riscv_pmu->hw_events[config]; > > } > > > > -static int riscv_map_cache_decode(u64 config, unsigned int *type, > > - unsigned int *op, unsigned int *result) > > -{ > > - return -ENOENT; > > -} > > - > > +/* > > + * Map generic hardware cache event > > + */ > > static int riscv_map_cache_event(u64 config) > > { > > unsigned int type, op, result; > > - int err = -ENOENT; > > - int code; > > + int ret; > > > > - err = riscv_map_cache_decode(config, &type, &op, &result); > > - if (!riscv_pmu->cache_events || err) > > - return err; > > + type = (config >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK; > > + op = (config >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK; > > + result = (config >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK; > > > > if (type >= PERF_COUNT_HW_CACHE_MAX || > > op >= PERF_COUNT_HW_CACHE_OP_MAX || > > result >= PERF_COUNT_HW_CACHE_RESULT_MAX) > > return -EINVAL; > > > > - code = (*riscv_pmu->cache_events)[type][op][result]; > > - if (code == RISCV_OP_UNSUPP) > > + ret = riscv_cache_event_map[type][op][result]; > > + if (ret == RISCV_OP_UNSUPP) > > return -EINVAL; > > > > - return code; > > + return ret == RISCV_OP_UNSUPP ? -ENOENT : ret; > > } > > > > /* > > @@ -190,8 +233,27 @@ static inline u64 read_counter(int idx) > > case RISCV_PMU_INSTRET: > > val = csr_read(CSR_INSTRET); > > break; > > + case RISCV_PMU_HPMCOUNTER3: > > + val = csr_read(CSR_HPMCOUNTER3); > > + break; > > + case RISCV_PMU_HPMCOUNTER4: > > + val = csr_read(CSR_HPMCOUNTER4); > > + break; > > + case RISCV_PMU_HPMCOUNTER5: > > + val = csr_read(CSR_HPMCOUNTER5); > > + break; > > + case RISCV_PMU_HPMCOUNTER6: > > + val = csr_read(CSR_HPMCOUNTER6); > > + break; > > + case RISCV_PMU_HPMCOUNTER7: > > + val = csr_read(CSR_HPMCOUNTER7); > > + break; > > + case RISCV_PMU_HPMCOUNTER8: > > + val = csr_read(CSR_HPMCOUNTER8); > > This is broken for RV32 because for RV32 we have to read two > CSRs to get a counter value. Oh yes, thanks for your reminder. Add them in the next version. > > Also, for correctly reading a 64bit counter on RV32 we have > to read just like get_cycles64() does for RV32. > > static inline u64 get_cycles64(void) > { > u32 hi, lo; > > do { > hi = get_cycles_hi(); > lo = get_cycles(); > } while (hi != get_cycles_hi()); > > return ((u64)hi << 32) | lo; > } > > Regards, > Anup > > > > + break; > > default: > > - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); > > + WARN_ON_ONCE(idx < RISCV_PMU_CYCLE || > > + idx > RISCV_TOTAL_COUNTERS); > > return -EINVAL; > > } > > > > @@ -204,6 +266,68 @@ static inline void write_counter(int idx, u64 value) > > WARN_ON_ONCE(1); > > } > > > > +static inline void write_event(int idx, u64 value) > > +{ > > + /* TODO: We shouldn't write CSR of m-mode explicitly here. Ideally, > > + * it need to set the event selector by SBI call or the s-mode > > + * shadow CSRs of them. Exploit illegal instruction exception to > > + * emulate mhpmcounterN access in m-mode. > > + */ > > + switch (idx) { > > + case RISCV_PMU_HPMCOUNTER3: > > + csr_write(CSR_MHPMEVENT3, value); > > + break; > > + case RISCV_PMU_HPMCOUNTER4: > > + csr_write(CSR_MHPMEVENT4, value); > > + break; > > + case RISCV_PMU_HPMCOUNTER5: > > + csr_write(CSR_MHPMEVENT5, value); > > + break; > > + case RISCV_PMU_HPMCOUNTER6: > > + csr_write(CSR_MHPMEVENT6, value); > > + break; > > + case RISCV_PMU_HPMCOUNTER7: > > + csr_write(CSR_MHPMEVENT7, value); > > + break; > > + case RISCV_PMU_HPMCOUNTER8: > > + csr_write(CSR_MHPMEVENT8, value); > > + break; > > + default: > > + WARN_ON_ONCE(idx < RISCV_PMU_HPMCOUNTER3 || > > + idx > RISCV_TOTAL_COUNTERS); > > + return; > > + } > > +} I was also wondering if you have any suggestions about the PMU SBI extension as I mentioned in the cover letter. Currently, we set the event selectors by emulation of OpenSBI, so just write the m-mode CSRs as above. > > + > > +/* > > + * Enable and disable event counters > > + */ > > + > > +static inline void riscv_pmu_enable_event(struct perf_event *event) > > +{ > > + struct hw_perf_event *hwc = &event->hw; > > + int idx = hwc->idx; > > + > > + if (is_event_counter(idx)) > > + write_event(idx, hwc->config); > > + > > + /* > > + * Since we cannot write to counters, this serves as an > > initialization > > + * to the delta-mechanism in pmu->read(); otherwise, the delta > > would be > > + * wrong when pmu->read is called for the first time. > > + */ > > + local64_set(&hwc->prev_count, read_counter(hwc->idx)); > > +} > > + > > +static inline void riscv_pmu_disable_event(struct perf_event *event) > > +{ > > + struct hw_perf_event *hwc = &event->hw; > > + int idx = hwc->idx; > > + > > + if (is_event_counter(idx)) > > + write_event(idx, 0); > > +} > > + > > /* > > * pmu->read: read and update the counter > > * > > @@ -232,6 +356,7 @@ static void riscv_pmu_read(struct perf_event *event) > > */ > > delta = (new_raw_count - prev_raw_count) & > > ((1ULL << riscv_pmu->counter_width) - 1); > > + > > local64_add(delta, &event->count); > > /* > > * Something like local64_sub(delta, &hwc->period_left) here is > > @@ -252,6 +377,11 @@ static void riscv_pmu_stop(struct perf_event *event, > > int flags) > > { > > struct hw_perf_event *hwc = &event->hw; > > > > + if (WARN_ON_ONCE(hwc->idx == -1)) > > + return; > > + > > + riscv_pmu_disable_event(event); > > + > > WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); > > hwc->state |= PERF_HES_STOPPED; > > > > @@ -271,6 +401,9 @@ static void riscv_pmu_start(struct perf_event *event, > > int flags) > > if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) > > return; > > > > + if (WARN_ON_ONCE(hwc->idx == -1)) > > + return; > > + > > if (flags & PERF_EF_RELOAD) { > > WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); > > > > @@ -281,14 +414,10 @@ static void riscv_pmu_start(struct perf_event *event, > > int flags) > > } > > > > hwc->state = 0; > > - perf_event_update_userpage(event); > > > > - /* > > - * Since we cannot write to counters, this serves as an > > initialization > > - * to the delta-mechanism in pmu->read(); otherwise, the delta > > would be > > - * wrong when pmu->read is called for the first time. > > - */ > > - local64_set(&hwc->prev_count, read_counter(hwc->idx)); > > + riscv_pmu_enable_event(event); > > + > > + perf_event_update_userpage(event); > > } > > > > /* > > @@ -298,21 +427,18 @@ static int riscv_pmu_add(struct perf_event *event, > > int flags) > > { > > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); > > struct hw_perf_event *hwc = &event->hw; > > + int count_idx; > > > > if (cpuc->n_events == riscv_pmu->num_counters) > > return -ENOSPC; > > > > - /* > > - * We don't have general conunters, so no binding-event-to-counter > > - * process here. > > - * > > - * Indexing using hwc->config generally not works, since config may > > - * contain extra information, but here the only info we have in > > - * hwc->config is the event index. > > - */ > > - hwc->idx = hwc->config; > > - cpuc->events[hwc->idx] = event; > > + count_idx = get_available_counter(event); > > + if (count_idx < 0) > > + return -ENOSPC; > > + > > cpuc->n_events++; > > + hwc->idx = count_idx; > > + cpuc->events[hwc->idx] = event; > > > > hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; > > > > @@ -330,8 +456,10 @@ static void riscv_pmu_del(struct perf_event *event, > > int flags) > > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); > > struct hw_perf_event *hwc = &event->hw; > > > > - cpuc->events[hwc->idx] = NULL; > > cpuc->n_events--; > > + __clear_bit(hwc->idx, &cpuc->used_cntr_mask); > > + > > + cpuc->events[hwc->idx] = NULL; > > riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); > > perf_event_update_userpage(event); > > } > > @@ -385,6 +513,7 @@ static int riscv_event_init(struct perf_event *event) > > { > > struct perf_event_attr *attr = &event->attr; > > struct hw_perf_event *hwc = &event->hw; > > + unsigned long config_base = 0; > > int err; > > int code; > > > > @@ -406,11 +535,17 @@ static int riscv_event_init(struct perf_event *event) > > code = riscv_pmu->map_cache_event(attr->config); > > break; > > case PERF_TYPE_RAW: > > - return -EOPNOTSUPP; > > + code = attr->config; > > + break; > > default: > > return -ENOENT; > > } > > > > + if (is_base_counter(code)) > > + config_base |= RISCV_PMU_TYPE_BASE; > > + else > > + config_base |= RISCV_PMU_TYPE_EVENT; > > + > > event->destroy = riscv_event_destroy; > > if (code < 0) { > > event->destroy(event); > > @@ -424,6 +559,7 @@ static int riscv_event_init(struct perf_event *event) > > * But since we don't have such support, later in pmu->add(), we > > just > > * use hwc->config as the index instead. > > */ > > + hwc->config_base = config_base; > > hwc->config = code; > > hwc->idx = -1; > > > > -- > > 2.27.0 > >