Hello Joonsoo,

On 30/12/14 07:38, Joonsoo Kim wrote:
> On Fri, Dec 26, 2014 at 05:39:03PM +0300, Stefan I. Strogin wrote:
>> /proc/cmainfo contains a list of currently allocated CMA buffers for every
>> CMA area when CONFIG_CMA_DEBUG is enabled.
> Hello,
>
> I think that providing these information looks useful, but, we need better
> implementation. As Laura said, it is better to use debugfs. And,
> instead of re-implementing the wheel, how about using tracepoint
> to print these information? See below comments.

Excuse me for a long delay. I've tried to give a detailed answer here:
https://lkml.org/lkml/2015/1/21/362
Do you mean by «the re-implemented wheel» seq_print_stack_trace()? If so
then it was thought to show an owner of each allocated buffer. I used a
similar way as in page_owner: saving stack_trace for each allocation. Do
you think we can use tracepoints instead?


>
>> Format is:
>>
>> <base_phys_addr> - <end_phys_addr> (<size> kB), allocated by <PID>\
>>              (<command name>), latency <allocation latency> us
>>  <stack backtrace when the buffer had been allocated>
>>
>> Signed-off-by: Stefan I. Strogin <s.stro...@partner.samsung.com>
>> ---
>>  mm/cma.c | 202 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 202 insertions(+)
>>
>> diff --git a/mm/cma.c b/mm/cma.c
>> index a85ae28..ffaea26 100644
>> --- a/mm/cma.c
>> +++ b/mm/cma.c
>> @@ -34,6 +34,10 @@
>>  #include <linux/cma.h>
>>  #include <linux/highmem.h>
>>  #include <linux/io.h>
>> +#include <linux/list.h>
>> +#include <linux/proc_fs.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/time.h>
>>  
>>  struct cma {
>>      unsigned long   base_pfn;
>> @@ -41,8 +45,25 @@ struct cma {
>>      unsigned long   *bitmap;
>>      unsigned int order_per_bit; /* Order of pages represented by one bit */
>>      struct mutex    lock;
>> +#ifdef CONFIG_CMA_DEBUG
>> +    struct list_head buffers_list;
>> +    struct mutex    list_lock;
>> +#endif
>>  };
>>  
>> +#ifdef CONFIG_CMA_DEBUG
>> +struct cma_buffer {
>> +    unsigned long pfn;
>> +    unsigned long count;
>> +    pid_t pid;
>> +    char comm[TASK_COMM_LEN];
>> +    unsigned int latency;
>> +    unsigned long trace_entries[16];
>> +    unsigned int nr_entries;
>> +    struct list_head list;
>> +};
>> +#endif
>> +
>>  static struct cma cma_areas[MAX_CMA_AREAS];
>>  static unsigned cma_area_count;
>>  static DEFINE_MUTEX(cma_mutex);
>> @@ -132,6 +153,10 @@ static int __init cma_activate_area(struct cma *cma)
>>      } while (--i);
>>  
>>      mutex_init(&cma->lock);
>> +#ifdef CONFIG_CMA_DEBUG
>> +    INIT_LIST_HEAD(&cma->buffers_list);
>> +    mutex_init(&cma->list_lock);
>> +#endif
>>      return 0;
>>  
>>  err:
>> @@ -347,6 +372,86 @@ err:
>>      return ret;
>>  }
>>  
>> +#ifdef CONFIG_CMA_DEBUG
>> +/**
>> + * cma_buffer_list_add() - add a new entry to a list of allocated buffers
>> + * @cma:     Contiguous memory region for which the allocation is performed.
>> + * @pfn:     Base PFN of the allocated buffer.
>> + * @count:   Number of allocated pages.
>> + * @latency: Nanoseconds spent to allocate the buffer.
>> + *
>> + * This function adds a new entry to the list of allocated contiguous memory
>> + * buffers in a CMA area. It uses the CMA area specificated by the device
>> + * if available or the default global one otherwise.
>> + */
>> +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn,
>> +                           int count, s64 latency)
>> +{
>> +    struct cma_buffer *cmabuf;
>> +    struct stack_trace trace;
>> +
>> +    cmabuf = kmalloc(sizeof(struct cma_buffer), GFP_KERNEL);
>> +    if (!cmabuf)
>> +            return -ENOMEM;
>> +
>> +    trace.nr_entries = 0;
>> +    trace.max_entries = ARRAY_SIZE(cmabuf->trace_entries);
>> +    trace.entries = &cmabuf->trace_entries[0];
>> +    trace.skip = 2;
>> +    save_stack_trace(&trace);
>> +
>> +    cmabuf->pfn = pfn;
>> +    cmabuf->count = count;
>> +    cmabuf->pid = task_pid_nr(current);
>> +    cmabuf->nr_entries = trace.nr_entries;
>> +    get_task_comm(cmabuf->comm, current);
>> +    cmabuf->latency = (unsigned int) div_s64(latency, NSEC_PER_USEC);
>> +
>> +    mutex_lock(&cma->list_lock);
>> +    list_add_tail(&cmabuf->list, &cma->buffers_list);
>> +    mutex_unlock(&cma->list_lock);
>> +
>> +    return 0;
>> +}
>> +
>> +/**
>> + * cma_buffer_list_del() - delete an entry from a list of allocated buffers
>> + * @cma:   Contiguous memory region for which the allocation was performed.
>> + * @pfn:   Base PFN of the released buffer.
>> + *
>> + * This function deletes a list entry added by cma_buffer_list_add().
>> + */
>> +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn)
>> +{
>> +    struct cma_buffer *cmabuf;
>> +
>> +    mutex_lock(&cma->list_lock);
>> +
>> +    list_for_each_entry(cmabuf, &cma->buffers_list, list)
>> +            if (cmabuf->pfn == pfn) {
>> +                    list_del(&cmabuf->list);
>> +                    kfree(cmabuf);
>> +                    goto out;
>> +            }
>> +
> Is there more elegant way to find buffer? This linear search overhead
> would change system behaviour if there are lots of buffers.
>
>> +    pr_err("%s(pfn %lu): couldn't find buffers list entry\n",
>> +           __func__, pfn);
>> +
>> +out:
>> +    mutex_unlock(&cma->list_lock);
>> +}
>> +#else
>> +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn,
>> +                           int count, s64 latency)
>> +{
>> +    return 0;
>> +}
>> +
>> +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn)
>> +{
>> +}
>> +#endif /* CONFIG_CMA_DEBUG */
>> +
>>  /**
>>   * cma_alloc() - allocate pages from contiguous area
>>   * @cma:   Contiguous memory region for which the allocation is performed.
>> @@ -361,11 +466,15 @@ struct page *cma_alloc(struct cma *cma, int count, 
>> unsigned int align)
>>      unsigned long mask, offset, pfn, start = 0;
>>      unsigned long bitmap_maxno, bitmap_no, bitmap_count;
>>      struct page *page = NULL;
>> +    struct timespec ts1, ts2;
>> +    s64 latency;
>>      int ret;
>>  
>>      if (!cma || !cma->count)
>>              return NULL;
>>  
>> +    getnstimeofday(&ts1);
>> +
>>      pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
>>               count, align);
>>  
>> @@ -413,6 +522,19 @@ struct page *cma_alloc(struct cma *cma, int count, 
>> unsigned int align)
>>              start = bitmap_no + mask + 1;
>>      }
>>  
>> +    getnstimeofday(&ts2);
>> +    latency = timespec_to_ns(&ts2) - timespec_to_ns(&ts1);
>> +
>> +    if (page) {
>> +            ret = cma_buffer_list_add(cma, pfn, count, latency);
>> +            if (ret) {
>> +                    pr_warn("%s(): cma_buffer_list_add() returned %d\n",
>> +                            __func__, ret);
>> +                    cma_release(cma, page, count);
>> +                    page = NULL;
>> +            }
> So, we would fail to allocate CMA memory if we can't allocate buffer
> for debugging. I don't think it makes sense. With tracepoint,
> we don't need to allocate buffer in runtime.
>
> Thanks.
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"d...@kvack.org";> em...@kvack.org </a>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to