The map->data buffers are used to preserve map->base profiling data for writing to disk. AIO map->cblocks are used to queue corresponding map->data buffers for asynchronous writing. map->cblocks objects are located in the last page of every map->data buffer.
Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com> --- Changes in v7: - implemented handling record.aio setting from perfconfig file Changes in v6: - adjusted setting of priorities for cblocks; Changes in v5: - reshaped layout of data structures; - implemented --aio option; Changes in v4: - converted mmap()/munmap() to malloc()/free() for mmap->data buffer management Changes in v2: - converted zalloc() to calloc() for allocation of mmap_aio array, - cleared typo and adjusted fallback branch code; --- tools/perf/builtin-record.c | 15 ++++++++++++- tools/perf/perf.h | 1 + tools/perf/util/evlist.c | 7 +++--- tools/perf/util/evlist.h | 3 ++- tools/perf/util/mmap.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/mmap.h | 6 ++++- 6 files changed, 79 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 22ebeb92ac51..f17a6f9cb1ba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -326,7 +326,8 @@ static int record__mmap_evlist(struct record *rec, if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode) < 0) { + opts->auxtrace_snapshot_mode, + opts->nr_cblocks) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -1287,6 +1288,8 @@ static int perf_record_config(const char *var, const char *value, void *cb) var = "call-graph.record-mode"; return perf_default_config(var, value, cb); } + if (!strcmp(var, "record.aio")) + rec->opts.nr_cblocks = strtol(value, NULL, 0); return 0; } @@ -1519,6 +1522,7 @@ static struct record record = { .default_per_cpu = true, }, .proc_map_timeout = 500, + .nr_cblocks = 2 }, .tool = { .sample = process_sample_event, @@ -1678,6 +1682,8 @@ static struct option __record_options[] = { "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), + OPT_INTEGER(0, "aio", &record.opts.nr_cblocks, + "asynchronous trace write operations (min: 1, max: 32, default: 2)"), OPT_END() }; @@ -1870,6 +1876,13 @@ int cmd_record(int argc, const char **argv) goto out; } + if (!(1 <= rec->opts.nr_cblocks && rec->opts.nr_cblocks <= 32)) + rec->opts.nr_cblocks = 2; + + if (verbose > 0) + pr_info("AIO trace writes: %d\n", rec->opts.nr_cblocks); + + err = __cmd_record(&record, argc, argv); out: perf_evlist__delete(rec->evlist); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 21bf7f5a3cf5..0a1ae2ae567a 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -82,6 +82,7 @@ struct record_opts { bool use_clockid; clockid_t clockid; unsigned int proc_map_timeout; + int nr_cblocks; }; struct option; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e7a4b31a84fb..08be79650a85 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1018,7 +1018,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite) + bool auxtrace_overwrite, + int nr_cblocks) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1028,7 +1029,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp; + struct mmap_params mp = { .nr_cblocks = nr_cblocks }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1060,7 +1061,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 2); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dc66436add98..a94d3c613254 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -162,7 +162,8 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite); + bool auxtrace_overwrite, + int nr_cblocks); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index fc832676a798..384d17cd1379 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -155,6 +155,14 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb void perf_mmap__munmap(struct perf_mmap *map) { + int i; + if (map->data) { + for (i = 0; i < map->nr_cblocks; ++i) + zfree(&(map->data[i])); + zfree(&(map->data)); + } + if (map->cblocks) + zfree(&(map->cblocks)); if (map->base != NULL) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; @@ -166,6 +174,7 @@ void perf_mmap__munmap(struct perf_mmap *map) int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) { + int i; /* * The last one will be done at perf_mmap__consume(), so that we * make sure we don't prevent tools from consuming every last event in @@ -190,6 +199,50 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) map->base = NULL; return -1; } + map->nr_cblocks = mp->nr_cblocks; + map->cblocks = calloc(map->nr_cblocks, sizeof(struct aiocb*)); + if (!map->cblocks) { + pr_debug2("failed to allocate perf event data buffers, error %d\n", + errno); + return -1; + } + map->data = calloc(map->nr_cblocks, sizeof(void*)); + if (map->data) { + int delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); + for (i = 0; i < map->nr_cblocks; ++i) { + map->data[i] = malloc(perf_mmap__mmap_len(map)); + if (map->data[i]) { + int prio; + unsigned char *data = map->data[i]; + map->cblocks[i] = (struct aiocb *)&data[map->mask + 1]; + memset(map->cblocks[i], 0, sizeof(struct aiocb)); + /* Use cblock.aio_fildes value different from -1 + * to denote started aio write operation on the + * cblock so it requires explicit record__aio_sync() + * call prior the cblock may be reused again. + */ + map->cblocks[i]->aio_fildes = -1; + /* Allocate cblocks with decreasing priority to + * have faster aio_write() calls because queued + * requests are kept in separate per-prio queues + * and adding a new request iterates thru shorter + * per-prio list. + */ + prio = delta_max - i; + if (prio < 0) + prio = 0; + map->cblocks[i]->aio_reqprio = prio; + } else { + pr_debug2("failed to allocate perf event data buffer, error %d\n", + errno); + return -1; + } + } + } else { + pr_debug2("failed to alloc perf event data buffers, error %d\n", + errno); + return -1; + } map->fd = fd; if (auxtrace_mmap__mmap(&map->auxtrace_mmap, diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index d82294db1295..4a9bb0ecae4f 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <asm/barrier.h> #include <stdbool.h> +#include <aio.h> #include "auxtrace.h" #include "event.h" @@ -25,6 +26,9 @@ struct perf_mmap { bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void **data; + struct aiocb **cblocks; + int nr_cblocks; }; /* @@ -56,7 +60,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask; + int prot, mask, nr_cblocks; struct auxtrace_mmap_params auxtrace_mp; };