Add support for raw events and hardware cache events. Currently, we set the events by writing the mhpmeventN CSRs, it would raise an illegal instruction exception and trap into m-mode to emulate event selector CSRs access. It doesn't make sense because we shouldn't write the m-mode CSRs in s-mode, it would be better that set events through SBI call or the shadow CSRs of s-mode. We would change it later.
Signed-off-by: Zong Li <zong...@sifive.com> --- arch/riscv/include/asm/perf_event.h | 65 ++++++--- arch/riscv/kernel/perf_event.c | 204 +++++++++++++++++++++++----- 2 files changed, 215 insertions(+), 54 deletions(-) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 062efd3a1d5d..41d515a1f331 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -14,39 +14,64 @@ #ifdef CONFIG_RISCV_BASE_PMU #define RISCV_BASE_COUNTERS 2 +#define RISCV_EVENT_COUNTERS 29 +#define RISCV_TOTAL_COUNTERS (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS) /* - * The RISCV_MAX_COUNTERS parameter should be specified. - */ - -#define RISCV_MAX_COUNTERS 2 - -/* - * These are the indexes of bits in counteren register *minus* 1, - * except for cycle. It would be coherent if it can directly mapped - * to counteren bit definition, but there is a *time* register at - * counteren[1]. Per-cpu structure is scarce resource here. - * * According to the spec, an implementation can support counter up to * mhpmcounter31, but many high-end processors has at most 6 general * PMCs, we give the definition to MHPMCOUNTER8 here. */ -#define RISCV_PMU_CYCLE 0 -#define RISCV_PMU_INSTRET 1 -#define RISCV_PMU_MHPMCOUNTER3 2 -#define RISCV_PMU_MHPMCOUNTER4 3 -#define RISCV_PMU_MHPMCOUNTER5 4 -#define RISCV_PMU_MHPMCOUNTER6 5 -#define RISCV_PMU_MHPMCOUNTER7 6 -#define RISCV_PMU_MHPMCOUNTER8 7 +#define RISCV_PMU_CYCLE 0 +#define RISCV_PMU_INSTRET 2 +#define RISCV_PMU_HPMCOUNTER3 3 +#define RISCV_PMU_HPMCOUNTER4 4 +#define RISCV_PMU_HPMCOUNTER5 5 +#define RISCV_PMU_HPMCOUNTER6 6 +#define RISCV_PMU_HPMCOUNTER7 7 +#define RISCV_PMU_HPMCOUNTER8 8 + +#define RISCV_PMU_HPMCOUNTER_FIRST 3 +#define RISCV_PMU_HPMCOUNTER_LAST \ + (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1) #define RISCV_OP_UNSUPP (-EOPNOTSUPP) +/* Hardware cache event encoding */ +#define PERF_HW_CACHE_TYPE 0 +#define PERF_HW_CACHE_OP 8 +#define PERF_HW_CACHE_RESULT 16 +#define PERF_HW_CACHE_MASK 0xff + +/* config_base encoding */ +#define RISCV_PMU_TYPE_MASK 0x3 +#define RISCV_PMU_TYPE_BASE 0x1 +#define RISCV_PMU_TYPE_EVENT 0x2 +#define RISCV_PMU_EXCLUDE_MASK 0xc +#define RISCV_PMU_EXCLUDE_USER 0x3 +#define RISCV_PMU_EXCLUDE_KERNEL 0x4 + +/* + * Currently, machine-mode supports emulation of mhpmeventN. Setting mhpmeventN + * to raise an illegal instruction exception to set event types in machine-mode. + * Eventually, we should set event types through standard SBI call or the shadow + * CSRs of supervisor-mode, because it is weird for writing CSR of machine-mode + * explicitly in supervisor-mode. These macro should be removed in the future. + */ +#define CSR_MHPMEVENT3 0x323 +#define CSR_MHPMEVENT4 0x324 +#define CSR_MHPMEVENT5 0x325 +#define CSR_MHPMEVENT6 0x326 +#define CSR_MHPMEVENT7 0x327 +#define CSR_MHPMEVENT8 0x328 + struct cpu_hw_events { /* # currently enabled events*/ int n_events; /* currently enabled events */ - struct perf_event *events[RISCV_MAX_COUNTERS]; + struct perf_event *events[RISCV_EVENT_COUNTERS]; + /* bitmap of used event counters */ + unsigned long used_cntr_mask; /* vendor-defined PMU data */ void *platform; }; diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c index c835f0362d94..0cfcd6f1e57b 100644 --- a/arch/riscv/kernel/perf_event.c +++ b/arch/riscv/kernel/perf_event.c @@ -139,6 +139,53 @@ static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] }, }; +/* + * Methods for checking and getting PMU information + */ + +static inline int is_base_counter(int idx) +{ + return (idx == RISCV_PMU_CYCLE || idx == RISCV_PMU_INSTRET); +} + +static inline int is_event_counter(int idx) +{ + return (idx >= RISCV_PMU_HPMCOUNTER_FIRST && + idx <= RISCV_PMU_HPMCOUNTER_LAST); +} + +static inline int get_available_counter(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + unsigned long config_base = hwc->config_base & RISCV_PMU_TYPE_MASK; + unsigned long mask; + int ret; + + switch (config_base) { + case RISCV_PMU_TYPE_BASE: + ret = hwc->config; + if (WARN_ON_ONCE(!is_base_counter(ret))) + return -ENOSPC; + break; + case RISCV_PMU_TYPE_EVENT: + mask = ~cpuc->used_cntr_mask; + ret = find_next_bit(&mask, RISCV_PMU_HPMCOUNTER_LAST, 3); + if (WARN_ON_ONCE(!is_event_counter(ret))) + return -ENOSPC; + break; + default: + return -ENOENT; + } + + __set_bit(ret, &cpuc->used_cntr_mask); + + return ret; +} + +/* + * Map generic hardware event + */ static int riscv_map_hw_event(u64 config) { if (config >= riscv_pmu->max_events) @@ -147,32 +194,28 @@ static int riscv_map_hw_event(u64 config) return riscv_pmu->hw_events[config]; } -static int riscv_map_cache_decode(u64 config, unsigned int *type, - unsigned int *op, unsigned int *result) -{ - return -ENOENT; -} - +/* + * Map generic hardware cache event + */ static int riscv_map_cache_event(u64 config) { unsigned int type, op, result; - int err = -ENOENT; - int code; + int ret; - err = riscv_map_cache_decode(config, &type, &op, &result); - if (!riscv_pmu->cache_events || err) - return err; + type = (config >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK; + op = (config >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK; + result = (config >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK; if (type >= PERF_COUNT_HW_CACHE_MAX || op >= PERF_COUNT_HW_CACHE_OP_MAX || result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - code = (*riscv_pmu->cache_events)[type][op][result]; - if (code == RISCV_OP_UNSUPP) + ret = riscv_cache_event_map[type][op][result]; + if (ret == RISCV_OP_UNSUPP) return -EINVAL; - return code; + return ret == RISCV_OP_UNSUPP ? -ENOENT : ret; } /* @@ -190,8 +233,27 @@ static inline u64 read_counter(int idx) case RISCV_PMU_INSTRET: val = csr_read(CSR_INSTRET); break; + case RISCV_PMU_HPMCOUNTER3: + val = csr_read(CSR_HPMCOUNTER3); + break; + case RISCV_PMU_HPMCOUNTER4: + val = csr_read(CSR_HPMCOUNTER4); + break; + case RISCV_PMU_HPMCOUNTER5: + val = csr_read(CSR_HPMCOUNTER5); + break; + case RISCV_PMU_HPMCOUNTER6: + val = csr_read(CSR_HPMCOUNTER6); + break; + case RISCV_PMU_HPMCOUNTER7: + val = csr_read(CSR_HPMCOUNTER7); + break; + case RISCV_PMU_HPMCOUNTER8: + val = csr_read(CSR_HPMCOUNTER8); + break; default: - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); + WARN_ON_ONCE(idx < RISCV_PMU_CYCLE || + idx > RISCV_TOTAL_COUNTERS); return -EINVAL; } @@ -204,6 +266,68 @@ static inline void write_counter(int idx, u64 value) WARN_ON_ONCE(1); } +static inline void write_event(int idx, u64 value) +{ + /* TODO: We shouldn't write CSR of m-mode explicitly here. Ideally, + * it need to set the event selector by SBI call or the s-mode + * shadow CSRs of them. Exploit illegal instruction exception to + * emulate mhpmcounterN access in m-mode. + */ + switch (idx) { + case RISCV_PMU_HPMCOUNTER3: + csr_write(CSR_MHPMEVENT3, value); + break; + case RISCV_PMU_HPMCOUNTER4: + csr_write(CSR_MHPMEVENT4, value); + break; + case RISCV_PMU_HPMCOUNTER5: + csr_write(CSR_MHPMEVENT5, value); + break; + case RISCV_PMU_HPMCOUNTER6: + csr_write(CSR_MHPMEVENT6, value); + break; + case RISCV_PMU_HPMCOUNTER7: + csr_write(CSR_MHPMEVENT7, value); + break; + case RISCV_PMU_HPMCOUNTER8: + csr_write(CSR_MHPMEVENT8, value); + break; + default: + WARN_ON_ONCE(idx < RISCV_PMU_HPMCOUNTER3 || + idx > RISCV_TOTAL_COUNTERS); + return; + } +} + +/* + * Enable and disable event counters + */ + +static inline void riscv_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (is_event_counter(idx)) + write_event(idx, hwc->config); + + /* + * Since we cannot write to counters, this serves as an initialization + * to the delta-mechanism in pmu->read(); otherwise, the delta would be + * wrong when pmu->read is called for the first time. + */ + local64_set(&hwc->prev_count, read_counter(hwc->idx)); +} + +static inline void riscv_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (is_event_counter(idx)) + write_event(idx, 0); +} + /* * pmu->read: read and update the counter * @@ -232,6 +356,7 @@ static void riscv_pmu_read(struct perf_event *event) */ delta = (new_raw_count - prev_raw_count) & ((1ULL << riscv_pmu->counter_width) - 1); + local64_add(delta, &event->count); /* * Something like local64_sub(delta, &hwc->period_left) here is @@ -252,6 +377,11 @@ static void riscv_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + if (WARN_ON_ONCE(hwc->idx == -1)) + return; + + riscv_pmu_disable_event(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; @@ -271,6 +401,9 @@ static void riscv_pmu_start(struct perf_event *event, int flags) if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) return; + if (WARN_ON_ONCE(hwc->idx == -1)) + return; + if (flags & PERF_EF_RELOAD) { WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); @@ -281,14 +414,10 @@ static void riscv_pmu_start(struct perf_event *event, int flags) } hwc->state = 0; - perf_event_update_userpage(event); - /* - * Since we cannot write to counters, this serves as an initialization - * to the delta-mechanism in pmu->read(); otherwise, the delta would be - * wrong when pmu->read is called for the first time. - */ - local64_set(&hwc->prev_count, read_counter(hwc->idx)); + riscv_pmu_enable_event(event); + + perf_event_update_userpage(event); } /* @@ -298,21 +427,18 @@ static int riscv_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; + int count_idx; if (cpuc->n_events == riscv_pmu->num_counters) return -ENOSPC; - /* - * We don't have general conunters, so no binding-event-to-counter - * process here. - * - * Indexing using hwc->config generally not works, since config may - * contain extra information, but here the only info we have in - * hwc->config is the event index. - */ - hwc->idx = hwc->config; - cpuc->events[hwc->idx] = event; + count_idx = get_available_counter(event); + if (count_idx < 0) + return -ENOSPC; + cpuc->n_events++; + hwc->idx = count_idx; + cpuc->events[hwc->idx] = event; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -330,8 +456,10 @@ static void riscv_pmu_del(struct perf_event *event, int flags) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - cpuc->events[hwc->idx] = NULL; cpuc->n_events--; + __clear_bit(hwc->idx, &cpuc->used_cntr_mask); + + cpuc->events[hwc->idx] = NULL; riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); perf_event_update_userpage(event); } @@ -385,6 +513,7 @@ static int riscv_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; + unsigned long config_base = 0; int err; int code; @@ -406,11 +535,17 @@ static int riscv_event_init(struct perf_event *event) code = riscv_pmu->map_cache_event(attr->config); break; case PERF_TYPE_RAW: - return -EOPNOTSUPP; + code = attr->config; + break; default: return -ENOENT; } + if (is_base_counter(code)) + config_base |= RISCV_PMU_TYPE_BASE; + else + config_base |= RISCV_PMU_TYPE_EVENT; + event->destroy = riscv_event_destroy; if (code < 0) { event->destroy(event); @@ -424,6 +559,7 @@ static int riscv_event_init(struct perf_event *event) * But since we don't have such support, later in pmu->add(), we just * use hwc->config as the index instead. */ + hwc->config_base = config_base; hwc->config = code; hwc->idx = -1; -- 2.27.0