Hello Suka, Thanks for your review!
Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> writes: > Thiago Jung Bauermann [bauer...@linux.vnet.ibm.com] wrote: >> @@ -166,9 +174,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); >> DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); >> DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); >> >> -#define MAX_NUM_REQUESTS ((H24x7_DATA_BUFFER_SIZE - \ >> +#define MAX_NUM_REQUESTS_V1 ((H24x7_DATA_BUFFER_SIZE - \ >> + sizeof(struct hv_24x7_request_buffer)) \ >> + / H24x7_REQUEST_SIZE_V1) >> +#define MAX_NUM_REQUESTS_V2 ((H24x7_DATA_BUFFER_SIZE - \ >> sizeof(struct hv_24x7_request_buffer)) \ >> - / sizeof(struct hv_24x7_request)) >> + / H24x7_REQUEST_SIZE_V2) > > Nit: Can we define MAX_NUM_REQUESTS(version) - with a version parameter ? It > will... That's a good idea. I created a function instead of a macro, I think it makes the code clearer since the macro would be a bit harder to read. >> @@ -1101,9 +1112,13 @@ static int add_event_to_24x7_request(struct >> perf_event *event, >> { >> u16 idx; >> int i; >> + size_t req_size; >> struct hv_24x7_request *req; >> >> - if (request_buffer->num_requests >= MAX_NUM_REQUESTS) { >> + if ((request_buffer->interface_version == 1 >> + && request_buffer->num_requests >= MAX_NUM_REQUESTS_V1) >> + || (request_buffer->interface_version > 1 >> + && request_buffer->num_requests >= MAX_NUM_REQUESTS_V2)) { >> pr_devel("Too many requests for 24x7 HCALL %d\n", > > ...simplify this check to > > if (request->buffer->num_requests >= MAX_NUM_REQUESTS(version)) That's better indeed. >> request_buffer->num_requests); >> return -EINVAL; >> @@ -1120,8 +1135,11 @@ static int add_event_to_24x7_request(struct >> perf_event *event, >> idx = event_get_vcpu(event); >> } >> >> + req_size = request_buffer->interface_version == 1 ? >> + H24x7_REQUEST_SIZE_V1 : H24x7_REQUEST_SIZE_V2; >> + > > Maybe similarly, with H24x7_REQUEST_SIZE(version) ? I implement this suggestion too. >> +/** >> + * get_count_from_result - get event count from the given result >> + * >> + * @event: Event associated with @res. >> + * @resb: Result buffer containing @res. >> + * @res: Result to work on. >> + * @countp: Output variable containing the event count. >> + * @next: Optional output variable pointing to the next result in @resb. >> + */ >> +static int get_count_from_result(struct perf_event *event, >> + struct hv_24x7_data_result_buffer *resb, >> + struct hv_24x7_result *res, u64 *countp, >> + struct hv_24x7_result **next) >> +{ >> + u16 num_elements = be16_to_cpu(res->num_elements_returned); >> + u16 data_size = be16_to_cpu(res->result_element_data_size); >> + unsigned int data_offset; >> + void *element_data; >> + int ret = 0; >> + >> + /* >> + * We can bail out early if the result is empty. >> + */ >> + if (!num_elements) { >> + pr_debug("Result of request %hhu is empty, nothing to do\n", >> + res->result_ix); >> + >> + if (next) >> + *next = (struct hv_24x7_result *) res->elements; >> + >> + return -ENODATA; >> + } >> + >> + /* >> + * This code assumes that a result has only one element. >> + */ >> + if (num_elements != 1) { >> + pr_debug("Error: result of request %hhu has %hu elements\n", >> + res->result_ix, num_elements); > > Could this happen due to an user request or would this indicate a bug > in the way we submitted the request (perf should submit separate request > for each lpar/index - we set ->max_ix and ->max_num_lpars to cpu_be16(1). That's a good question. I don't know to be honest. > Minor inconsistency with proceeding, is that if the next element passes, > this return code is lost/over written. IOW, h_24x7_event_commit_txn()'s > return value depends on the last element we process, even if intermediate > ones encounter an error. Good point. Would it be better if h_24x7_event_commit_txn interrupted processing and returned an error instead of continuing? The version below addresses your comments, except the one above. Subject: [PATCH 7/8] powerpc/perf/hv-24x7: Support v2 of the hypervisor API POWER9 introduces a new version of the hypervisor API to access the 24x7 perf counters. The new version changed some of the structures used for requests and results. Signed-off-by: Thiago Jung Bauermann <bauer...@linux.vnet.ibm.com> --- arch/powerpc/perf/hv-24x7.c | 143 +++++++++++++++++++++++++++------ arch/powerpc/perf/hv-24x7.h | 58 ++++++++++--- arch/powerpc/platforms/pseries/Kconfig | 2 +- 3 files changed, 169 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 043cbc78be98..35010403f191 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> +#include <asm/cputhreads.h> #include <asm/firmware.h> #include <asm/hvcall.h> #include <asm/io.h> @@ -27,6 +28,9 @@ #include "hv-24x7-catalog.h" #include "hv-common.h" +/* Version of the 24x7 hypervisor API that we should use in this machine. */ +static int interface_version; + static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -74,7 +78,11 @@ static const char *domain_name(unsigned domain) static bool catalog_entry_domain_is_valid(unsigned domain) { - return is_physical_domain(domain); + /* POWER8 doesn't support virtual domains. */ + if (interface_version == 1) + return is_physical_domain(domain); + else + return domain_is_valid(domain); } /* @@ -166,9 +174,11 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); -#define MAX_NUM_REQUESTS ((H24x7_DATA_BUFFER_SIZE - \ - sizeof(struct hv_24x7_request_buffer)) \ - / sizeof(struct hv_24x7_request)) +static unsigned int max_num_requests(int interface_version) +{ + return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer)) + / H24x7_REQUEST_SIZE(interface_version); +} static char *event_name(struct hv_24x7_event_data *ev, int *len) { @@ -1052,7 +1062,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE); memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE); - request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT; + request_buffer->interface_version = interface_version; /* memset above set request_buffer->num_requests to 0 */ } @@ -1077,7 +1087,7 @@ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer, if (ret) { struct hv_24x7_request *req; - req = &request_buffer->requests[0]; + req = request_buffer->requests; pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n", req->performance_domain, req->data_offset, req->starting_ix, req->starting_lpar_ix, @@ -1101,9 +1111,11 @@ static int add_event_to_24x7_request(struct perf_event *event, { u16 idx; int i; + size_t req_size; struct hv_24x7_request *req; - if (request_buffer->num_requests >= MAX_NUM_REQUESTS) { + if (request_buffer->num_requests >= + max_num_requests(request_buffer->interface_version)) { pr_devel("Too many requests for 24x7 HCALL %d\n", request_buffer->num_requests); return -EINVAL; @@ -1120,8 +1132,10 @@ static int add_event_to_24x7_request(struct perf_event *event, idx = event_get_vcpu(event); } + req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version); + i = request_buffer->num_requests++; - req = &request_buffer->requests[i]; + req = (void *) request_buffer->requests + i * req_size; req->performance_domain = event_get_domain(event); req->data_size = cpu_to_be16(8); @@ -1131,14 +1145,97 @@ static int add_event_to_24x7_request(struct perf_event *event, req->starting_ix = cpu_to_be16(idx); req->max_ix = cpu_to_be16(1); + if (request_buffer->interface_version > 1 && + req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { + req->starting_thread_group_ix = idx % 2; + req->max_num_thread_groups = 1; + } + return 0; } +/** + * get_count_from_result - get event count from the given result + * + * @event: Event associated with @res. + * @resb: Result buffer containing @res. + * @res: Result to work on. + * @countp: Output variable containing the event count. + * @next: Optional output variable pointing to the next result in @resb. + */ +static int get_count_from_result(struct perf_event *event, + struct hv_24x7_data_result_buffer *resb, + struct hv_24x7_result *res, u64 *countp, + struct hv_24x7_result **next) +{ + u16 num_elements = be16_to_cpu(res->num_elements_returned); + u16 data_size = be16_to_cpu(res->result_element_data_size); + unsigned int data_offset; + void *element_data; + int ret = 0; + + /* + * We can bail out early if the result is empty. + */ + if (!num_elements) { + pr_debug("Result of request %hhu is empty, nothing to do\n", + res->result_ix); + + if (next) + *next = (struct hv_24x7_result *) res->elements; + + return -ENODATA; + } + + /* + * This code assumes that a result has only one element. + */ + if (num_elements != 1) { + pr_debug("Error: result of request %hhu has %hu elements\n", + res->result_ix, num_elements); + + if (!next) + return -ENOTSUPP; + + /* + * We still need to go through the motions so that we can return + * a pointer to the next result. + */ + ret = -ENOTSUPP; + } + + if (data_size != sizeof(u64)) { + pr_debug("Error: result of request %hhu has data of %hu bytes\n", + res->result_ix, data_size); + + if (!next) + return -ENOTSUPP; + + ret = -ENOTSUPP; + } + + if (resb->interface_version == 1) + data_offset = offsetof(struct hv_24x7_result_element_v1, + element_data); + else + data_offset = offsetof(struct hv_24x7_result_element_v2, + element_data); + + element_data = res->elements + data_offset; + + if (!ret) + *countp = be64_to_cpu(*((u64 *) element_data)); + + /* The next result is after the result element. */ + if (next) + *next = element_data + data_size; + + return ret; +} + static int single_24x7_request(struct perf_event *event, u64 *count) { int ret; - u16 num_elements; - struct hv_24x7_result *result; struct hv_24x7_request_buffer *request_buffer; struct hv_24x7_data_result_buffer *result_buffer; @@ -1158,14 +1255,9 @@ static int single_24x7_request(struct perf_event *event, u64 *count) if (ret) goto out; - result = result_buffer->results; - - /* This code assumes that a result has only one element. */ - num_elements = be16_to_cpu(result->num_elements_returned); - WARN_ON_ONCE(num_elements != 1); - /* process result from hcall */ - *count = be64_to_cpu(result->elements[0].element_data[0]); + ret = get_count_from_result(event, result_buffer, + result_buffer->results, count, NULL); out: put_cpu_var(hv_24x7_reqb); @@ -1425,16 +1517,13 @@ static int h_24x7_event_commit_txn(struct pmu *pmu) for (i = 0, res = result_buffer->results; i < result_buffer->num_results; i++, res = next_res) { struct perf_event *event = h24x7hw->events[res->result_ix]; - u16 num_elements = be16_to_cpu(res->num_elements_returned); - u16 data_size = be16_to_cpu(res->result_element_data_size); - /* This code assumes that a result has only one element. */ - WARN_ON_ONCE(num_elements != 1); + ret = get_count_from_result(event, result_buffer, res, &count, + &next_res); + if (ret) + continue; - count = be64_to_cpu(res->elements[0].element_data[0]); update_event_count(event, count); - - next_res = (void *) res->elements[0].element_data + data_size; } put_cpu_var(hv_24x7_hw); @@ -1486,6 +1575,12 @@ static int hv_24x7_init(void) return -ENODEV; } + /* POWER8 only supports v1, while POWER9 only supports v2. */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + interface_version = 2; + else + interface_version = 1; + hret = hv_perf_caps_get(&caps); if (hret) { pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index b95909400b2a..5092c4a222a6 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -10,6 +10,8 @@ enum hv_perf_domains { HV_PERF_DOMAIN_MAX, }; +#define H24x7_REQUEST_SIZE(iface_version) (iface_version == 1 ? 16 : 32) + struct hv_24x7_request { /* PHYSICAL domains require enabling via phyp/hmc. */ __u8 performance_domain; @@ -42,19 +44,47 @@ struct hv_24x7_request { /* chip, core, or virtual processor based on @performance_domain */ __be16 starting_ix; __be16 max_ix; + + /* The following fields were added in v2 of the 24x7 interface. */ + + __u8 starting_thread_group_ix; + + /* -1 means all thread groups starting at @starting_thread_group_ix */ + __u8 max_num_thread_groups; + + __u8 reserved2[0xE]; } __packed; struct hv_24x7_request_buffer { /* 0 - ? */ /* 1 - ? */ -#define HV_24X7_IF_VERSION_CURRENT 0x01 __u8 interface_version; __u8 num_requests; __u8 reserved[0xE]; - struct hv_24x7_request requests[1]; + struct hv_24x7_request requests[]; +} __packed; + +struct hv_24x7_result_element_v1 { + __be16 lpar_ix; + + /* + * represents the core, chip, or virtual processor based on the + * request's @performance_domain + */ + __be16 domain_ix; + + /* -1 if @performance_domain does not refer to a virtual processor */ + __be32 lpar_cfg_instance_id; + + /* size = @result_element_data_size of containing result. */ + __u64 element_data[]; } __packed; -struct hv_24x7_result_element { +/* + * We need a separate struct for v2 because the offset of @element_data changed + * between versions. + */ +struct hv_24x7_result_element_v2 { __be16 lpar_ix; /* @@ -66,8 +96,12 @@ struct hv_24x7_result_element { /* -1 if @performance_domain does not refer to a virtual processor */ __be32 lpar_cfg_instance_id; + __u8 thread_group_ix; + + __u8 reserved[7]; + /* size = @result_element_data_size of containing result. */ - __u64 element_data[1]; + __u64 element_data[]; } __packed; struct hv_24x7_result { @@ -94,10 +128,16 @@ struct hv_24x7_result { __be16 result_element_data_size; __u8 reserved[0x2]; - /* WARNING: only valid for first result element due to variable sizes - * of result elements */ - /* struct hv_24x7_result_element[@num_elements_returned] */ - struct hv_24x7_result_element elements[1]; + /* + * Either + * struct hv_24x7_result_element_v1[@num_elements_returned] + * or + * struct hv_24x7_result_element_v2[@num_elements_returned] + * + * depending on the interface_version field of the + * struct hv_24x7_data_result_buffer containing this result. + */ + char elements[]; } __packed; struct hv_24x7_data_result_buffer { @@ -113,7 +153,7 @@ struct hv_24x7_data_result_buffer { __u8 reserved2[0x8]; /* WARNING: only valid for the first result due to variable sizes of * results */ - struct hv_24x7_result results[1]; /* [@num_results] */ + struct hv_24x7_result results[]; /* [@num_results] */ } __packed; #endif diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 913c54e23eea..3a6dfd14f64b 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -124,7 +124,7 @@ config HV_PERF_CTRS Enable access to hypervisor supplied counters in perf. Currently, this enables code that uses the hcall GetPerfCounterInfo and 24x7 interfaces to retrieve counters. GPCI exists on Power 6 and later - systems. 24x7 is available on Power 8 systems. + systems. 24x7 is available on Power 8 and later systems. If unsure, select Y. -- 2.7.4