Thiago Jung Bauermann [bauer...@linux.vnet.ibm.com] wrote: > POWER9 introduces a new version of the hypervisor API to access the 24x7 > perf counters. The new version changed some of the structures used for > requests and results. > > Signed-off-by: Thiago Jung Bauermann <bauer...@linux.vnet.ibm.com> > --- > arch/powerpc/perf/hv-24x7.c | 145 > +++++++++++++++++++++++++++------ > arch/powerpc/perf/hv-24x7.h | 59 ++++++++++++-- > arch/powerpc/platforms/pseries/Kconfig | 2 +- > 3 files changed, 173 insertions(+), 33 deletions(-) > > diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c > index 043cbc78be98..95c44f1d2fd2 100644 > --- a/arch/powerpc/perf/hv-24x7.c > +++ b/arch/powerpc/perf/hv-24x7.c > @@ -18,6 +18,7 @@ > #include <linux/slab.h> > #include <linux/vmalloc.h> > > +#include <asm/cputhreads.h> > #include <asm/firmware.h> > #include <asm/hvcall.h> > #include <asm/io.h> > @@ -27,6 +28,9 @@ > #include "hv-24x7-catalog.h" > #include "hv-common.h" > > +/* Version of the 24x7 hypervisor API that we should use in this machine. */ > +static int interface_version; > + > static bool domain_is_valid(unsigned domain) > { > switch (domain) { > @@ -74,7 +78,11 @@ static const char *domain_name(unsigned domain) > > static bool catalog_entry_domain_is_valid(unsigned domain) > { > - return is_physical_domain(domain); > + /* POWER8 doesn't support virtual domains. */ > + if (interface_version == 1) > + return is_physical_domain(domain); > + else > + return domain_is_valid(domain); > } > > /* > @@ -166,9 +174,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); > DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); > DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); > > -#define MAX_NUM_REQUESTS ((H24x7_DATA_BUFFER_SIZE - \ > +#define MAX_NUM_REQUESTS_V1 ((H24x7_DATA_BUFFER_SIZE - \ > + sizeof(struct hv_24x7_request_buffer)) \ > + / H24x7_REQUEST_SIZE_V1) > +#define MAX_NUM_REQUESTS_V2 ((H24x7_DATA_BUFFER_SIZE - \ > sizeof(struct hv_24x7_request_buffer)) \ > - / sizeof(struct hv_24x7_request)) > + / H24x7_REQUEST_SIZE_V2)
Nit: Can we define MAX_NUM_REQUESTS(version) - with a version parameter ? It will... > > static char *event_name(struct hv_24x7_event_data *ev, int *len) > { > @@ -1052,7 +1063,7 @@ static void init_24x7_request(struct > hv_24x7_request_buffer *request_buffer, > memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE); > memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE); > > - request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT; > + request_buffer->interface_version = interface_version; > /* memset above set request_buffer->num_requests to 0 */ > } > > @@ -1077,7 +1088,7 @@ static int make_24x7_request(struct > hv_24x7_request_buffer *request_buffer, > if (ret) { > struct hv_24x7_request *req; > > - req = &request_buffer->requests[0]; > + req = request_buffer->requests; > pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret > 0x%lx (%ld) detail=0x%x failing ix=%x\n", > req->performance_domain, req->data_offset, > req->starting_ix, req->starting_lpar_ix, > @@ -1101,9 +1112,13 @@ static int add_event_to_24x7_request(struct perf_event > *event, > { > u16 idx; > int i; > + size_t req_size; > struct hv_24x7_request *req; > > - if (request_buffer->num_requests >= MAX_NUM_REQUESTS) { > + if ((request_buffer->interface_version == 1 > + && request_buffer->num_requests >= MAX_NUM_REQUESTS_V1) > + || (request_buffer->interface_version > 1 > + && request_buffer->num_requests >= MAX_NUM_REQUESTS_V2)) { > pr_devel("Too many requests for 24x7 HCALL %d\n", ...simplify this check to if (request->buffer->num_requests >= MAX_NUM_REQUESTS(version)) > request_buffer->num_requests); > return -EINVAL; > @@ -1120,8 +1135,11 @@ static int add_event_to_24x7_request(struct perf_event > *event, > idx = event_get_vcpu(event); > } > > + req_size = request_buffer->interface_version == 1 ? > + H24x7_REQUEST_SIZE_V1 : H24x7_REQUEST_SIZE_V2; > + Maybe similarly, with H24x7_REQUEST_SIZE(version) ? > i = request_buffer->num_requests++; > - req = &request_buffer->requests[i]; > + req = (void *) request_buffer->requests + i * req_size; > > req->performance_domain = event_get_domain(event); > req->data_size = cpu_to_be16(8); > @@ -1131,14 +1149,97 @@ static int add_event_to_24x7_request(struct > perf_event *event, > req->starting_ix = cpu_to_be16(idx); > req->max_ix = cpu_to_be16(1); > > + if (request_buffer->interface_version > 1 && > + req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { > + req->starting_thread_group_ix = idx % 2; > + req->max_num_thread_groups = 1; > + } > + > return 0; > } > > +/** > + * get_count_from_result - get event count from the given result > + * > + * @event: Event associated with @res. > + * @resb: Result buffer containing @res. > + * @res: Result to work on. > + * @countp: Output variable containing the event count. > + * @next: Optional output variable pointing to the next result in @resb. > + */ > +static int get_count_from_result(struct perf_event *event, > + struct hv_24x7_data_result_buffer *resb, > + struct hv_24x7_result *res, u64 *countp, > + struct hv_24x7_result **next) > +{ > + u16 num_elements = be16_to_cpu(res->num_elements_returned); > + u16 data_size = be16_to_cpu(res->result_element_data_size); > + unsigned int data_offset; > + void *element_data; > + int ret = 0; > + > + /* > + * We can bail out early if the result is empty. > + */ > + if (!num_elements) { > + pr_debug("Result of request %hhu is empty, nothing to do\n", > + res->result_ix); > + > + if (next) > + *next = (struct hv_24x7_result *) res->elements; > + > + return -ENODATA; > + } > + > + /* > + * This code assumes that a result has only one element. > + */ > + if (num_elements != 1) { > + pr_debug("Error: result of request %hhu has %hu elements\n", > + res->result_ix, num_elements); Could this happen due to an user request or would this indicate a bug in the way we submitted the request (perf should submit separate request for each lpar/index - we set ->max_ix and ->max_num_lpars to cpu_be16(1). Minor inconsistency with proceeding, is that if the next element passes, this return code is lost/over written. IOW, h_24x7_event_commit_txn()'s return value depends on the last element we process, even if intermediate ones encounter an error. > + > + if (!next) > + return -ENOTSUPP; > + > + /* > + * We still need to go through the motions so that we can return > + * a pointer to the next result. > + */ > + ret = -ENOTSUPP; > + } > + > + if (data_size != sizeof(u64)) { > + pr_debug("Error: result of request %hhu has data of %hu > bytes\n", > + res->result_ix, data_size); > + > + if (!next) > + return -ENOTSUPP; > + > + ret = -ENOTSUPP; > + } > + > + if (resb->interface_version == 1) > + data_offset = offsetof(struct hv_24x7_result_element_v1, > + element_data); > + else > + data_offset = offsetof(struct hv_24x7_result_element_v2, > + element_data); > + > + element_data = res->elements + data_offset; > + > + if (!ret) > + *countp = be64_to_cpu(*((u64 *) element_data)); > + > + /* The next result is after the result element. */ > + if (next) > + *next = element_data + data_size; > + > + return ret; > +} > + > static int single_24x7_request(struct perf_event *event, u64 *count) > { > int ret; > - u16 num_elements; > - struct hv_24x7_result *result; > struct hv_24x7_request_buffer *request_buffer; > struct hv_24x7_data_result_buffer *result_buffer; > > @@ -1158,14 +1259,9 @@ static int single_24x7_request(struct perf_event > *event, u64 *count) > if (ret) > goto out; > > - result = result_buffer->results; > - > - /* This code assumes that a result has only one element. */ > - num_elements = be16_to_cpu(result->num_elements_returned); > - WARN_ON_ONCE(num_elements != 1); > - > /* process result from hcall */ > - *count = be64_to_cpu(result->elements[0].element_data[0]); > + ret = get_count_from_result(event, result_buffer, > + result_buffer->results, count, NULL); > > out: > put_cpu_var(hv_24x7_reqb); > @@ -1425,16 +1521,13 @@ static int h_24x7_event_commit_txn(struct pmu *pmu) > for (i = 0, res = result_buffer->results; > i < result_buffer->num_results; i++, res = next_res) { > struct perf_event *event = h24x7hw->events[res->result_ix]; > - u16 num_elements = be16_to_cpu(res->num_elements_returned); > - u16 data_size = be16_to_cpu(res->result_element_data_size); > > - /* This code assumes that a result has only one element. */ > - WARN_ON_ONCE(num_elements != 1); > + ret = get_count_from_result(event, result_buffer, res, &count, > + &next_res); > + if (ret) > + continue; > > - count = be64_to_cpu(res->elements[0].element_data[0]); > update_event_count(event, count); > - > - next_res = (void *) res->elements[0].element_data + data_size; > } > > put_cpu_var(hv_24x7_hw); > @@ -1486,6 +1579,12 @@ static int hv_24x7_init(void) > return -ENODEV; > } > > + /* POWER8 only supports v1, while POWER9 only supports v2. */ > + if (cpu_has_feature(CPU_FTR_ARCH_300)) > + interface_version = 2; > + else > + interface_version = 1; > + > hret = hv_perf_caps_get(&caps); > if (hret) { > pr_debug("could not obtain capabilities, not enabling, > rc=%ld\n", > diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h > index b95909400b2a..149af6e9538f 100644 > --- a/arch/powerpc/perf/hv-24x7.h > +++ b/arch/powerpc/perf/hv-24x7.h > @@ -10,6 +10,9 @@ enum hv_perf_domains { > HV_PERF_DOMAIN_MAX, > }; > > +#define H24x7_REQUEST_SIZE_V1 16 > +#define H24x7_REQUEST_SIZE_V2 32 > + > struct hv_24x7_request { > /* PHYSICAL domains require enabling via phyp/hmc. */ > __u8 performance_domain; > @@ -42,19 +45,47 @@ struct hv_24x7_request { > /* chip, core, or virtual processor based on @performance_domain */ > __be16 starting_ix; > __be16 max_ix; > + > + /* The following fields were added in v2 of the 24x7 interface. */ > + > + __u8 starting_thread_group_ix; > + > + /* -1 means all thread groups starting at @starting_thread_group_ix */ > + __u8 max_num_thread_groups; > + > + __u8 reserved2[0xE]; > } __packed; > > struct hv_24x7_request_buffer { > /* 0 - ? */ > /* 1 - ? */ > -#define HV_24X7_IF_VERSION_CURRENT 0x01 > __u8 interface_version; > __u8 num_requests; > __u8 reserved[0xE]; > - struct hv_24x7_request requests[1]; > + struct hv_24x7_request requests[]; > +} __packed; > + > +struct hv_24x7_result_element_v1 { > + __be16 lpar_ix; > + > + /* > + * represents the core, chip, or virtual processor based on the > + * request's @performance_domain > + */ > + __be16 domain_ix; > + > + /* -1 if @performance_domain does not refer to a virtual processor */ > + __be32 lpar_cfg_instance_id; > + > + /* size = @result_element_data_size of containing result. */ > + __u64 element_data[]; > } __packed; > > -struct hv_24x7_result_element { > +/* > + * We need a separate struct for v2 because the offset of @element_data > changed > + * between versions. > + */ > +struct hv_24x7_result_element_v2 { > __be16 lpar_ix; > > /* > @@ -66,8 +97,12 @@ struct hv_24x7_result_element { > /* -1 if @performance_domain does not refer to a virtual processor */ > __be32 lpar_cfg_instance_id; > > + __u8 thread_group_ix; > + > + __u8 reserved[7]; > + > /* size = @result_element_data_size of containing result. */ > - __u64 element_data[1]; > + __u64 element_data[]; > } __packed; > > struct hv_24x7_result { > @@ -94,10 +129,16 @@ struct hv_24x7_result { > __be16 result_element_data_size; > __u8 reserved[0x2]; > > - /* WARNING: only valid for first result element due to variable sizes > - * of result elements */ > - /* struct hv_24x7_result_element[@num_elements_returned] */ > - struct hv_24x7_result_element elements[1]; > + /* > + * Either > + * struct hv_24x7_result_element_v1[@num_elements_returned] > + * or > + * struct hv_24x7_result_element_v2[@num_elements_returned] > + * > + * depending on the interface_version field of the > + * struct hv_24x7_data_result_buffer containing this result. > + */ > + char elements[]; > } __packed; > > struct hv_24x7_data_result_buffer { > @@ -113,7 +154,7 @@ struct hv_24x7_data_result_buffer { > __u8 reserved2[0x8]; > /* WARNING: only valid for the first result due to variable sizes of > * results */ > - struct hv_24x7_result results[1]; /* [@num_results] */ > + struct hv_24x7_result results[]; /* [@num_results] */ > } __packed; > > #endif > diff --git a/arch/powerpc/platforms/pseries/Kconfig > b/arch/powerpc/platforms/pseries/Kconfig > index 913c54e23eea..3a6dfd14f64b 100644 > --- a/arch/powerpc/platforms/pseries/Kconfig > +++ b/arch/powerpc/platforms/pseries/Kconfig > @@ -124,7 +124,7 @@ config HV_PERF_CTRS > Enable access to hypervisor supplied counters in perf. Currently, > this enables code that uses the hcall GetPerfCounterInfo and 24x7 > interfaces to retrieve counters. GPCI exists on Power 6 and later > - systems. 24x7 is available on Power 8 systems. > + systems. 24x7 is available on Power 8 and later systems. > > If unsure, select Y. > > -- > 2.7.4