On Mon, Jun 15, 2020 at 06:58:04PM +0200, Jiri Olsa wrote: > On Mon, Jun 15, 2020 at 05:37:53PM +0300, Alexey Budankov wrote: > > > > On 15.06.2020 15:30, Jiri Olsa wrote: > > > On Mon, Jun 15, 2020 at 08:20:38AM +0300, Alexey Budankov wrote: > > >> > > >> On 08.06.2020 19:07, Jiri Olsa wrote: > > >>> On Mon, Jun 08, 2020 at 12:54:31PM +0300, Alexey Budankov wrote: > > >>>> > > >>>> On 08.06.2020 11:43, Jiri Olsa wrote: > > >>>>> On Mon, Jun 08, 2020 at 11:08:56AM +0300, Alexey Budankov wrote: > > >>>>>> > > >>>>>> On 05.06.2020 19:15, Alexey Budankov wrote: > > >>>>>>> > > >>>>>>> On 05.06.2020 14:38, Jiri Olsa wrote: > > >> <SNIP> > > >>>>>>> revents = fdarray_fixed_revents(array, pos); > > >>>>>>> fdarray__del(array, pos); > > >>>>>> > > >>>>>> So how is it about just adding _revents() and _del() for fixed fds > > >>>>>> with > > >>>>>> correction of retval to bool for fdarray__add()? > > >>>>> > > >>>>> I don't like the separation for fixed and non-fixed fds, > > >>>>> why can't we make generic? > > >>>> > > >>>> Usage models are different but they want still to be parts of the same > > >>>> class > > >>>> for atomic poll(). The distinction is filterable vs. not filterable. > > >>>> The distinction should be somehow provided in API. Options are: > > >>>> 1. expose separate API calls like __add_nonfilterable(), > > >>>> __del_nonfilterable(); > > >>>> use nonfilterable quality in __filter() and __poll() and, perhaps, > > >>>> other internals; > > >>>> 2. extend fdarray__add(, nonfilterable) with the nonfilterable quality > > >>>> use the type in __filter() and __poll() and, perhaps, other > > >>>> internals; > > >>>> expose less API calls in comparison with option 1 > > >>>> > > >>>> Exposure of pos for filterable fds should be converted to bool since > > >>>> currently > > >>>> the returned pos can become stale and there is no way in API to check > > >>>> its state. > > >>>> So it could look like this: > > >>>> > > >>>> fdkey = fdarray__add(array, fd, events, type) > > >>>> type: filterable, nonfilterable, somthing else > > >>>> revents = fdarray__get_revents(fdkey); > > >>>> fdarray__del(array, fdkey); > > >>> > > >>> I think there's solution without having filterable type, > > >>> I'm not sure why you think this is needed > > >>> > > >>> I'm busy with other things this week, but I think I can > > >>> come up with some patch early next week if needed > > >> > > >> Friendly reminder. > > > > > > hm? I believe we discussed this in here: > > > https://lore.kernel.org/lkml/20200609145611.GI1558310@krava/ > > > > Do you want it to be implemented like in the patch posted by the link? > > no idea.. looking for good solution ;-)
Friendly reminder. jirka > > how about switching completely to epoll? I tried and it > does not look that bad > > there might be some loose ends (interface change), but > I think this would solve our problems with fdarray > > I'll be able to get back to it by the end of the week, > but if you want to check/finish this patch earlier go ahead > > jirka > > > --- > tools/lib/perf/evlist.c | 134 +++++++++++++++++------ > tools/lib/perf/include/internal/evlist.h | 9 +- > tools/perf/builtin-kvm.c | 8 +- > tools/perf/builtin-record.c | 14 ++- > 4 files changed, 120 insertions(+), 45 deletions(-) > > diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c > index 6a875a0f01bb..8569cdd8bbd8 100644 > --- a/tools/lib/perf/evlist.c > +++ b/tools/lib/perf/evlist.c > @@ -23,6 +23,7 @@ > #include <perf/cpumap.h> > #include <perf/threadmap.h> > #include <api/fd/array.h> > +#include <sys/epoll.h> > > void perf_evlist__init(struct perf_evlist *evlist) > { > @@ -32,7 +33,10 @@ void perf_evlist__init(struct perf_evlist *evlist) > INIT_HLIST_HEAD(&evlist->heads[i]); > INIT_LIST_HEAD(&evlist->entries); > evlist->nr_entries = 0; > - fdarray__init(&evlist->pollfd, 64); > + INIT_LIST_HEAD(&evlist->poll_data); > + evlist->poll_cnt = 0; > + evlist->poll_act = 0; > + evlist->poll_fd = -1; > } > > static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, > @@ -120,6 +124,23 @@ static void perf_evlist__purge(struct perf_evlist > *evlist) > evlist->nr_entries = 0; > } > > +struct poll_data { > + int fd; > + void *ptr; > + struct list_head list; > +}; > + > +static void perf_evlist__exit_pollfd(struct perf_evlist *evlist) > +{ > + struct poll_data *data, *tmp; > + > + if (evlist->poll_fd != -1) > + close(evlist->poll_fd); > + > + list_for_each_entry_safe(data, tmp, &evlist->poll_data, list) > + free(data); > +} > + > void perf_evlist__exit(struct perf_evlist *evlist) > { > perf_cpu_map__put(evlist->cpus); > @@ -128,7 +149,7 @@ void perf_evlist__exit(struct perf_evlist *evlist) > evlist->cpus = NULL; > evlist->all_cpus = NULL; > evlist->threads = NULL; > - fdarray__exit(&evlist->pollfd); > + perf_evlist__exit_pollfd(evlist); > } > > void perf_evlist__delete(struct perf_evlist *evlist) > @@ -285,56 +306,105 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, > > int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) > { > - int nr_cpus = perf_cpu_map__nr(evlist->cpus); > - int nr_threads = perf_thread_map__nr(evlist->threads); > - int nfds = 0; > - struct perf_evsel *evsel; > - > - perf_evlist__for_each_entry(evlist, evsel) { > - if (evsel->system_wide) > - nfds += nr_cpus; > - else > - nfds += nr_cpus * nr_threads; > - } > + int poll_fd; > > - if (fdarray__available_entries(&evlist->pollfd) < nfds && > - fdarray__grow(&evlist->pollfd, nfds) < 0) > - return -ENOMEM; > + poll_fd = epoll_create1(EPOLL_CLOEXEC); > + if (!poll_fd) > + return -1; > > + evlist->poll_fd = poll_fd; > return 0; > } > > -int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, > - void *ptr, short revent) > +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, > + struct poll_data *data, > + short revent) > { > - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); > + struct epoll_event *events, ev = { > + .data.ptr = data, > + .events = revent | EPOLLERR | EPOLLHUP, > + }; > + int err; > + > + err = epoll_ctl(evlist->poll_fd, EPOLL_CTL_ADD, data->fd, &ev); > + if (err) > + return err; > > - if (pos >= 0) { > - evlist->pollfd.priv[pos].ptr = ptr; > - fcntl(fd, F_SETFL, O_NONBLOCK); > + events = realloc(evlist->poll_events, sizeof(ev) * evlist->poll_cnt); > + if (events) { > + evlist->poll_events = events; > + evlist->poll_cnt++; > } > > - return pos; > + return events ? 0 : -ENOMEM; > } > > -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, > - void *arg __maybe_unused) > +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, > + void *ptr, short revent) > { > - struct perf_mmap *map = fda->priv[fd].ptr; > + struct poll_data *data = zalloc(sizeof(*data)); > + int err; > > - if (map) > - perf_mmap__put(map); > + if (!data) > + return -ENOMEM; > + > + data->fd = fd; > + data->ptr = ptr; > + > + err = __perf_evlist__add_pollfd(evlist, data, revent); > + if (!err) > + list_add_tail(&data->list, &evlist->poll_data); > + > + return err; > } > > int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short > revents_and_mask) > { > - return fdarray__filter(&evlist->pollfd, revents_and_mask, > - perf_evlist__munmap_filtered, NULL); > + struct epoll_event *events = evlist->poll_events; > + int i, removed = 0; > + > + for (i = 0; i < evlist->poll_act; i++) { > + if (events[i].events & revents_and_mask) { > + struct poll_data *data = events[i].data.ptr; > + > + if (data->ptr) > + perf_mmap__put(data->ptr); > + > + epoll_ctl(evlist->poll_fd, EPOLL_CTL_DEL, data->fd, > &events[i]); > + > + list_del(&data->list); > + free(data); > + removed++; > + } > + } > + > + return evlist->poll_cnt -= removed; > +} > + > +bool perf_evlist__pollfd_data(struct perf_evlist *evlist, int fd) > +{ > + int i; > + > + if (evlist->poll_act < 0) > + return false; > + > + for (i = 0; i < evlist->poll_act; i++) { > + struct poll_data *data = evlist->poll_events[i].data.ptr; > + > + if (data->fd == fd) > + return true; > + } > + > + return false; > } > > int perf_evlist__poll(struct perf_evlist *evlist, int timeout) > { > - return fdarray__poll(&evlist->pollfd, timeout); > + evlist->poll_act = epoll_wait(evlist->poll_fd, > + evlist->poll_events, > + evlist->poll_cnt, > + timeout); > + return evlist->poll_act; > } > > static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, > bool overwrite) > @@ -593,7 +663,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, > return -ENOMEM; > } > > - if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) > < 0) > + if (evlist->poll_fd == -1 && perf_evlist__alloc_pollfd(evlist) < 0) > return -ENOMEM; > > if (perf_cpu_map__empty(cpus)) > diff --git a/tools/lib/perf/include/internal/evlist.h > b/tools/lib/perf/include/internal/evlist.h > index 74dc8c3f0b66..39b08a04b992 100644 > --- a/tools/lib/perf/include/internal/evlist.h > +++ b/tools/lib/perf/include/internal/evlist.h > @@ -3,7 +3,6 @@ > #define __LIBPERF_INTERNAL_EVLIST_H > > #include <linux/list.h> > -#include <api/fd/array.h> > #include <internal/evsel.h> > > #define PERF_EVLIST__HLIST_BITS 8 > @@ -12,6 +11,7 @@ > struct perf_cpu_map; > struct perf_thread_map; > struct perf_mmap_param; > +struct epoll_event; > > struct perf_evlist { > struct list_head entries; > @@ -22,7 +22,11 @@ struct perf_evlist { > struct perf_thread_map *threads; > int nr_mmaps; > size_t mmap_len; > - struct fdarray pollfd; > + int poll_fd; > + int poll_cnt; > + int poll_act; > + struct epoll_event *poll_events; > + struct list_head poll_data; > struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; > struct perf_mmap *mmap; > struct perf_mmap *mmap_ovw; > @@ -124,4 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, > struct perf_evsel *evsel, > int cpu, int thread, int fd); > > +bool perf_evlist__pollfd_data(struct perf_evlist *evlist, int fd); > #endif /* __LIBPERF_INTERNAL_EVLIST_H */ > diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c > index 95a77058023e..decc75745395 100644 > --- a/tools/perf/builtin-kvm.c > +++ b/tools/perf/builtin-kvm.c > @@ -940,7 +940,7 @@ static int perf_kvm__handle_stdin(void) > > static int kvm_events_live_report(struct perf_kvm_stat *kvm) > { > - int nr_stdin, ret, err = -EINVAL; > + int ret, err = -EINVAL; > struct termios save; > > /* live flag must be set first */ > @@ -971,8 +971,7 @@ static int kvm_events_live_report(struct perf_kvm_stat > *kvm) > if (evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0) > goto out; > > - nr_stdin = evlist__add_pollfd(kvm->evlist, fileno(stdin)); > - if (nr_stdin < 0) > + if (evlist__add_pollfd(kvm->evlist, fileno(stdin))) > goto out; > > if (fd_set_nonblock(fileno(stdin)) != 0) > @@ -982,7 +981,6 @@ static int kvm_events_live_report(struct perf_kvm_stat > *kvm) > evlist__enable(kvm->evlist); > > while (!done) { > - struct fdarray *fda = &kvm->evlist->core.pollfd; > int rc; > > rc = perf_kvm__mmap_read(kvm); > @@ -993,7 +991,7 @@ static int kvm_events_live_report(struct perf_kvm_stat > *kvm) > if (err) > goto out; > > - if (fda->entries[nr_stdin].revents & POLLIN) > + if (perf_evlist__pollfd_data(&kvm->evlist->core, fileno(stdin))) > done = perf_kvm__handle_stdin(); > > if (!rc && !done) > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c > index e108d90ae2ed..a49bf4186aab 100644 > --- a/tools/perf/builtin-record.c > +++ b/tools/perf/builtin-record.c > @@ -1576,12 +1576,6 @@ static int __cmd_record(struct record *rec, int argc, > const char **argv) > status = -1; > goto out_delete_session; > } > - err = evlist__add_pollfd(rec->evlist, done_fd); > - if (err < 0) { > - pr_err("Failed to add wakeup eventfd to poll list\n"); > - status = err; > - goto out_delete_session; > - } > #endif // HAVE_EVENTFD_SUPPORT > > session->header.env.comp_type = PERF_COMP_ZSTD; > @@ -1624,6 +1618,14 @@ static int __cmd_record(struct record *rec, int argc, > const char **argv) > } > session->header.env.comp_mmap_len = session->evlist->core.mmap_len; > > +#ifdef HAVE_EVENTFD_SUPPORT > + err = evlist__add_pollfd(rec->evlist, done_fd); > + if (err < 0) { > + pr_err("Failed to add wakeup eventfd to poll list\n"); > + goto out_child; > + } > +#endif // HAVE_EVENTFD_SUPPORT > + > if (rec->opts.kcore) { > err = record__kcore_copy(&session->machines.host, data); > if (err) { > -- > 2.25.4 >