On Mon, Jun 15, 2020 at 05:37:53PM +0300, Alexey Budankov wrote: > > On 15.06.2020 15:30, Jiri Olsa wrote: > > On Mon, Jun 15, 2020 at 08:20:38AM +0300, Alexey Budankov wrote: > >> > >> On 08.06.2020 19:07, Jiri Olsa wrote: > >>> On Mon, Jun 08, 2020 at 12:54:31PM +0300, Alexey Budankov wrote: > >>>> > >>>> On 08.06.2020 11:43, Jiri Olsa wrote: > >>>>> On Mon, Jun 08, 2020 at 11:08:56AM +0300, Alexey Budankov wrote: > >>>>>> > >>>>>> On 05.06.2020 19:15, Alexey Budankov wrote: > >>>>>>> > >>>>>>> On 05.06.2020 14:38, Jiri Olsa wrote: > >> <SNIP> > >>>>>>> revents = fdarray_fixed_revents(array, pos); > >>>>>>> fdarray__del(array, pos); > >>>>>> > >>>>>> So how is it about just adding _revents() and _del() for fixed fds with > >>>>>> correction of retval to bool for fdarray__add()? > >>>>> > >>>>> I don't like the separation for fixed and non-fixed fds, > >>>>> why can't we make generic? > >>>> > >>>> Usage models are different but they want still to be parts of the same > >>>> class > >>>> for atomic poll(). The distinction is filterable vs. not filterable. > >>>> The distinction should be somehow provided in API. Options are: > >>>> 1. expose separate API calls like __add_nonfilterable(), > >>>> __del_nonfilterable(); > >>>> use nonfilterable quality in __filter() and __poll() and, perhaps, > >>>> other internals; > >>>> 2. extend fdarray__add(, nonfilterable) with the nonfilterable quality > >>>> use the type in __filter() and __poll() and, perhaps, other internals; > >>>> expose less API calls in comparison with option 1 > >>>> > >>>> Exposure of pos for filterable fds should be converted to bool since > >>>> currently > >>>> the returned pos can become stale and there is no way in API to check > >>>> its state. > >>>> So it could look like this: > >>>> > >>>> fdkey = fdarray__add(array, fd, events, type) > >>>> type: filterable, nonfilterable, somthing else > >>>> revents = fdarray__get_revents(fdkey); > >>>> fdarray__del(array, fdkey); > >>> > >>> I think there's solution without having filterable type, > >>> I'm not sure why you think this is needed > >>> > >>> I'm busy with other things this week, but I think I can > >>> come up with some patch early next week if needed > >> > >> Friendly reminder. > > > > hm? I believe we discussed this in here: > > https://lore.kernel.org/lkml/20200609145611.GI1558310@krava/ > > Do you want it to be implemented like in the patch posted by the link?
no idea.. looking for good solution ;-) how about switching completely to epoll? I tried and it does not look that bad there might be some loose ends (interface change), but I think this would solve our problems with fdarray I'll be able to get back to it by the end of the week, but if you want to check/finish this patch earlier go ahead jirka --- tools/lib/perf/evlist.c | 134 +++++++++++++++++------ tools/lib/perf/include/internal/evlist.h | 9 +- tools/perf/builtin-kvm.c | 8 +- tools/perf/builtin-record.c | 14 ++- 4 files changed, 120 insertions(+), 45 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 6a875a0f01bb..8569cdd8bbd8 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include <perf/cpumap.h> #include <perf/threadmap.h> #include <api/fd/array.h> +#include <sys/epoll.h> void perf_evlist__init(struct perf_evlist *evlist) { @@ -32,7 +33,10 @@ void perf_evlist__init(struct perf_evlist *evlist) INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; - fdarray__init(&evlist->pollfd, 64); + INIT_LIST_HEAD(&evlist->poll_data); + evlist->poll_cnt = 0; + evlist->poll_act = 0; + evlist->poll_fd = -1; } static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, @@ -120,6 +124,23 @@ static void perf_evlist__purge(struct perf_evlist *evlist) evlist->nr_entries = 0; } +struct poll_data { + int fd; + void *ptr; + struct list_head list; +}; + +static void perf_evlist__exit_pollfd(struct perf_evlist *evlist) +{ + struct poll_data *data, *tmp; + + if (evlist->poll_fd != -1) + close(evlist->poll_fd); + + list_for_each_entry_safe(data, tmp, &evlist->poll_data, list) + free(data); +} + void perf_evlist__exit(struct perf_evlist *evlist) { perf_cpu_map__put(evlist->cpus); @@ -128,7 +149,7 @@ void perf_evlist__exit(struct perf_evlist *evlist) evlist->cpus = NULL; evlist->all_cpus = NULL; evlist->threads = NULL; - fdarray__exit(&evlist->pollfd); + perf_evlist__exit_pollfd(evlist); } void perf_evlist__delete(struct perf_evlist *evlist) @@ -285,56 +306,105 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->cpus); - int nr_threads = perf_thread_map__nr(evlist->threads); - int nfds = 0; - struct perf_evsel *evsel; - - perf_evlist__for_each_entry(evlist, evsel) { - if (evsel->system_wide) - nfds += nr_cpus; - else - nfds += nr_cpus * nr_threads; - } + int poll_fd; - if (fdarray__available_entries(&evlist->pollfd) < nfds && - fdarray__grow(&evlist->pollfd, nfds) < 0) - return -ENOMEM; + poll_fd = epoll_create1(EPOLL_CLOEXEC); + if (!poll_fd) + return -1; + evlist->poll_fd = poll_fd; return 0; } -int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent) +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, + struct poll_data *data, + short revent) { - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); + struct epoll_event *events, ev = { + .data.ptr = data, + .events = revent | EPOLLERR | EPOLLHUP, + }; + int err; + + err = epoll_ctl(evlist->poll_fd, EPOLL_CTL_ADD, data->fd, &ev); + if (err) + return err; - if (pos >= 0) { - evlist->pollfd.priv[pos].ptr = ptr; - fcntl(fd, F_SETFL, O_NONBLOCK); + events = realloc(evlist->poll_events, sizeof(ev) * evlist->poll_cnt); + if (events) { + evlist->poll_events = events; + evlist->poll_cnt++; } - return pos; + return events ? 0 : -ENOMEM; } -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, - void *arg __maybe_unused) +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + void *ptr, short revent) { - struct perf_mmap *map = fda->priv[fd].ptr; + struct poll_data *data = zalloc(sizeof(*data)); + int err; - if (map) - perf_mmap__put(map); + if (!data) + return -ENOMEM; + + data->fd = fd; + data->ptr = ptr; + + err = __perf_evlist__add_pollfd(evlist, data, revent); + if (!err) + list_add_tail(&data->list, &evlist->poll_data); + + return err; } int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->pollfd, revents_and_mask, - perf_evlist__munmap_filtered, NULL); + struct epoll_event *events = evlist->poll_events; + int i, removed = 0; + + for (i = 0; i < evlist->poll_act; i++) { + if (events[i].events & revents_and_mask) { + struct poll_data *data = events[i].data.ptr; + + if (data->ptr) + perf_mmap__put(data->ptr); + + epoll_ctl(evlist->poll_fd, EPOLL_CTL_DEL, data->fd, &events[i]); + + list_del(&data->list); + free(data); + removed++; + } + } + + return evlist->poll_cnt -= removed; +} + +bool perf_evlist__pollfd_data(struct perf_evlist *evlist, int fd) +{ + int i; + + if (evlist->poll_act < 0) + return false; + + for (i = 0; i < evlist->poll_act; i++) { + struct poll_data *data = evlist->poll_events[i].data.ptr; + + if (data->fd == fd) + return true; + } + + return false; } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) { - return fdarray__poll(&evlist->pollfd, timeout); + evlist->poll_act = epoll_wait(evlist->poll_fd, + evlist->poll_events, + evlist->poll_cnt, + timeout); + return evlist->poll_act; } static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) @@ -593,7 +663,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, return -ENOMEM; } - if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + if (evlist->poll_fd == -1 && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; if (perf_cpu_map__empty(cpus)) diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 74dc8c3f0b66..39b08a04b992 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -3,7 +3,6 @@ #define __LIBPERF_INTERNAL_EVLIST_H #include <linux/list.h> -#include <api/fd/array.h> #include <internal/evsel.h> #define PERF_EVLIST__HLIST_BITS 8 @@ -12,6 +11,7 @@ struct perf_cpu_map; struct perf_thread_map; struct perf_mmap_param; +struct epoll_event; struct perf_evlist { struct list_head entries; @@ -22,7 +22,11 @@ struct perf_evlist { struct perf_thread_map *threads; int nr_mmaps; size_t mmap_len; - struct fdarray pollfd; + int poll_fd; + int poll_cnt; + int poll_act; + struct epoll_event *poll_events; + struct list_head poll_data; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; struct perf_mmap *mmap; struct perf_mmap *mmap_ovw; @@ -124,4 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, int fd); +bool perf_evlist__pollfd_data(struct perf_evlist *evlist, int fd); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 95a77058023e..decc75745395 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -940,7 +940,7 @@ static int perf_kvm__handle_stdin(void) static int kvm_events_live_report(struct perf_kvm_stat *kvm) { - int nr_stdin, ret, err = -EINVAL; + int ret, err = -EINVAL; struct termios save; /* live flag must be set first */ @@ -971,8 +971,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) if (evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0) goto out; - nr_stdin = evlist__add_pollfd(kvm->evlist, fileno(stdin)); - if (nr_stdin < 0) + if (evlist__add_pollfd(kvm->evlist, fileno(stdin))) goto out; if (fd_set_nonblock(fileno(stdin)) != 0) @@ -982,7 +981,6 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) evlist__enable(kvm->evlist); while (!done) { - struct fdarray *fda = &kvm->evlist->core.pollfd; int rc; rc = perf_kvm__mmap_read(kvm); @@ -993,7 +991,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) if (err) goto out; - if (fda->entries[nr_stdin].revents & POLLIN) + if (perf_evlist__pollfd_data(&kvm->evlist->core, fileno(stdin))) done = perf_kvm__handle_stdin(); if (!rc && !done) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e108d90ae2ed..a49bf4186aab 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1576,12 +1576,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) status = -1; goto out_delete_session; } - err = evlist__add_pollfd(rec->evlist, done_fd); - if (err < 0) { - pr_err("Failed to add wakeup eventfd to poll list\n"); - status = err; - goto out_delete_session; - } #endif // HAVE_EVENTFD_SUPPORT session->header.env.comp_type = PERF_COMP_ZSTD; @@ -1624,6 +1618,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } session->header.env.comp_mmap_len = session->evlist->core.mmap_len; +#ifdef HAVE_EVENTFD_SUPPORT + err = evlist__add_pollfd(rec->evlist, done_fd); + if (err < 0) { + pr_err("Failed to add wakeup eventfd to poll list\n"); + goto out_child; + } +#endif // HAVE_EVENTFD_SUPPORT + if (rec->opts.kcore) { err = record__kcore_copy(&session->machines.host, data); if (err) { -- 2.25.4