Hi Steve,

On Tue, Apr 19, 2016 at 10:34:23AM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rost...@goodmis.org>
> 
> In order to add the ability to let tasks that are filtered by the events
> have their children also be traced on fork (and then not traced on exit),
> convert the array into a pid bitmask. Most of the time the number of pids is
> only 32768 pids or a 4k bitmask, which is the same size as the default list
> currently is, and that list could grow if more pids are listed.
> 
> This also greatly simplifies the code.
> 
> Suggested-by: "H. Peter Anvin" <h...@zytor.com>
> Signed-off-by: Steven Rostedt <rost...@goodmis.org>
> ---

[SNIP]
> @@ -1571,7 +1572,7 @@ ftrace_event_pid_write(struct file *filp, const char 
> __user *ubuf,
>       struct seq_file *m = filp->private_data;
>       struct trace_array *tr = m->private;
>       struct trace_pid_list *filtered_pids = NULL;
> -     struct trace_pid_list *pid_list = NULL;
> +     struct trace_pid_list *pid_list;
>       struct trace_event_file *file;
>       struct trace_parser parser;
>       unsigned long val;
> @@ -1579,7 +1580,7 @@ ftrace_event_pid_write(struct file *filp, const char 
> __user *ubuf,
>       ssize_t read = 0;
>       ssize_t ret = 0;
>       pid_t pid;
> -     int i;
> +     int nr_pids = 0;
>  
>       if (!cnt)
>               return 0;
> @@ -1592,10 +1593,43 @@ ftrace_event_pid_write(struct file *filp, const char 
> __user *ubuf,
>               return -ENOMEM;
>  
>       mutex_lock(&event_mutex);
> +     filtered_pids = rcu_dereference_protected(tr->filtered_pids,
> +                                          lockdep_is_held(&event_mutex));
> +
>       /*
> -      * Load as many pids into the array before doing a
> -      * swap from the tr->filtered_pids to the new list.
> +      * Always recreate a new array. The write is an all or nothing
> +      * operation. Always create a new array when adding new pids by
> +      * the user. If the operation fails, then the current list is
> +      * not modified.
>        */
> +     pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
> +     if (!pid_list) {
> +             read = -ENOMEM;
> +             goto out;
> +     }
> +     pid_list->pid_max = READ_ONCE(pid_max);
> +     /* Only truncating will shrink pid_max */
> +     if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
> +             pid_list->pid_max = filtered_pids->pid_max;
> +     pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
> +     if (!pid_list->pids) {
> +             kfree(pid_list);
> +             read = -ENOMEM;
> +             goto out;
> +     }
> +     if (filtered_pids) {
> +             /* copy the current bits to the new max */
> +             pid = find_first_bit(filtered_pids->pids,
> +                                  filtered_pids->pid_max);
> +             while (pid < filtered_pids->pid_max) {
> +                     set_bit(pid, pid_list->pids);
> +                     pid = find_next_bit(filtered_pids->pids,
> +                                         filtered_pids->pid_max,
> +                                         pid + 1);
> +                     nr_pids++;
> +             }

Why not just use memcpy and keep nr_pids in the pid_list?

Thanks,
Namhyung



> +     }
> +
>       while (cnt > 0) {
>  
>               this_pos = 0;
> @@ -1613,92 +1647,35 @@ ftrace_event_pid_write(struct file *filp, const char 
> __user *ubuf,
>               ret = -EINVAL;
>               if (kstrtoul(parser.buffer, 0, &val))
>                       break;
> -             if (val > INT_MAX)
> +             if (val >= pid_list->pid_max)
>                       break;
>  
>               pid = (pid_t)val;
>  
> -             ret = -ENOMEM;
> -             if (!pid_list) {
> -                     pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
> -                     if (!pid_list)
> -                             break;
> -
> -                     filtered_pids = 
> rcu_dereference_protected(tr->filtered_pids,
> -                                                     
> lockdep_is_held(&event_mutex));
> -                     if (filtered_pids)
> -                             pid_list->order = filtered_pids->order;
> -                     else
> -                             pid_list->order = 0;
> -
> -                     pid_list->pids = (void *)__get_free_pages(GFP_KERNEL,
> -                                                               
> pid_list->order);
> -                     if (!pid_list->pids)
> -                             break;
> -
> -                     if (filtered_pids) {
> -                             pid_list->nr_pids = filtered_pids->nr_pids;
> -                             memcpy(pid_list->pids, filtered_pids->pids,
> -                                    pid_list->nr_pids * sizeof(pid_t));
> -                     } else
> -                             pid_list->nr_pids = 0;
> -             }
> -
> -             if (pid_list->nr_pids >= max_pids(pid_list)) {
> -                     pid_t *pid_page;
> -
> -                     pid_page = (void *)__get_free_pages(GFP_KERNEL,
> -                                                         pid_list->order + 
> 1);
> -                     if (!pid_page)
> -                             break;
> -                     memcpy(pid_page, pid_list->pids,
> -                            pid_list->nr_pids * sizeof(pid_t));
> -                     free_pages((unsigned long)pid_list->pids, 
> pid_list->order);
> -
> -                     pid_list->order++;
> -                     pid_list->pids = pid_page;
> -             }
> +             set_bit(pid, pid_list->pids);
> +             nr_pids++;
>  
> -             pid_list->pids[pid_list->nr_pids++] = pid;
>               trace_parser_clear(&parser);
>               ret = 0;
>       }
>       trace_parser_put(&parser);
>  
>       if (ret < 0) {
> -             if (pid_list)
> -                     free_pages((unsigned long)pid_list->pids, 
> pid_list->order);
> +             vfree(pid_list->pids);
>               kfree(pid_list);
> -             mutex_unlock(&event_mutex);
> -             return ret;
> -     }
> -
> -     if (!pid_list) {
> -             mutex_unlock(&event_mutex);
> -             return ret;
> +             read = ret;
> +             goto out;
>       }
>  
> -     sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL);
> -
> -     /* Remove duplicates */
> -     for (i = 1; i < pid_list->nr_pids; i++) {
> -             int start = i;
> -
> -             while (i < pid_list->nr_pids &&
> -                    pid_list->pids[i - 1] == pid_list->pids[i])
> -                     i++;
> -
> -             if (start != i) {
> -                     if (i < pid_list->nr_pids) {
> -                             memmove(&pid_list->pids[start], 
> &pid_list->pids[i],
> -                                     (pid_list->nr_pids - i) * 
> sizeof(pid_t));
> -                             pid_list->nr_pids -= i - start;
> -                             i = start;
> -                     } else
> -                             pid_list->nr_pids = start;
> -             }
> +     if (!nr_pids) {
> +             /* Cleared the list of pids */
> +             vfree(pid_list->pids);
> +             kfree(pid_list);
> +             read = ret;
> +             if (!filtered_pids)
> +                     goto out;
> +             pid_list = NULL;
>       }
> -
>       rcu_assign_pointer(tr->filtered_pids, pid_list);
>  
>       list_for_each_entry(file, &tr->events, list) {
> @@ -1708,7 +1685,7 @@ ftrace_event_pid_write(struct file *filp, const char 
> __user *ubuf,
>       if (filtered_pids) {
>               synchronize_sched();
>  
> -             free_pages((unsigned long)filtered_pids->pids, 
> filtered_pids->order);
> +             vfree(filtered_pids->pids);
>               kfree(filtered_pids);
>       } else {
>               /*
> @@ -1745,10 +1722,12 @@ ftrace_event_pid_write(struct file *filp, const char 
> __user *ubuf,
>        */
>       on_each_cpu(ignore_task_cpu, tr, 1);
>  
> + out:
>       mutex_unlock(&event_mutex);
>  
>       ret = read;
> -     *ppos += read;
> +     if (read > 0)
> +             *ppos += read;
>  
>       return ret;
>  }
> -- 
> 2.8.0.rc3
> 
> 

Reply via email to