On 08/07, Maíra Canal wrote:
> This patch exposes the accumulated amount of active time per client
> through the fdinfo infrastructure. The amount of active time is exposed
> for each V3D queue: BIN, RENDER, CSD, TFU and CACHE_CLEAN.
> 
> In order to calculate the amount of active time per client, a CPU clock
> is used through the function local_clock(). The point where the jobs has
> started is marked and is finally compared with the time that the job had
> finished.
> 
> Moreover, the number of jobs submitted to each queue is also exposed on
> fdinfo through the identifier "v3d-jobs-<queue>".
> 
> Co-developed-by: Jose Maria Casanova Crespo <jmcasan...@igalia.com>
> Signed-off-by: Jose Maria Casanova Crespo <jmcasan...@igalia.com>
> Signed-off-by: Maíra Canal <mca...@igalia.com>
> ---
>  drivers/gpu/drm/v3d/v3d_drv.c   | 30 +++++++++++++++++++++++++++++-
>  drivers/gpu/drm/v3d/v3d_drv.h   | 23 +++++++++++++++++++++++
>  drivers/gpu/drm/v3d/v3d_gem.c   |  1 +
>  drivers/gpu/drm/v3d/v3d_irq.c   | 17 +++++++++++++++++
>  drivers/gpu/drm/v3d/v3d_sched.c | 24 ++++++++++++++++++++++++
>  5 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
> index ffbbe9d527d3..ca65c707da03 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -19,6 +19,7 @@
>  #include <linux/module.h>
>  #include <linux/of_platform.h>
>  #include <linux/platform_device.h>
> +#include <linux/sched/clock.h>
>  #include <linux/reset.h>
>  
>  #include <drm/drm_drv.h>
> @@ -111,6 +112,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
>       v3d_priv->v3d = v3d;
>  
>       for (i = 0; i < V3D_MAX_QUEUES; i++) {
> +             v3d_priv->enabled_ns[i] = 0;
> +             v3d_priv->start_ns[i] = 0;
> +             v3d_priv->jobs_sent[i] = 0;
> +
>               sched = &v3d->queue[i].sched;
>               drm_sched_entity_init(&v3d_priv->sched_entity[i],
>                                     DRM_SCHED_PRIORITY_NORMAL, &sched,
> @@ -136,7 +141,29 @@ v3d_postclose(struct drm_device *dev, struct drm_file 
> *file)
>       kfree(v3d_priv);
>  }
>  
> -DEFINE_DRM_GEM_FOPS(v3d_drm_fops);
> +static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file)
> +{
> +     struct v3d_file_priv *file_priv = file->driver_priv;
> +     u64 timestamp = local_clock();
> +     enum v3d_queue queue;
> +
> +     for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
> +             drm_printf(p, "drm-engine-%s: \t%llu ns\n",
> +                        v3d_queue_to_string(queue),
> +                        file_priv->start_ns[queue] ? 
> file_priv->enabled_ns[queue]
> +                                                   + timestamp - 
> file_priv->start_ns[queue]
> +                                                   : 
> file_priv->enabled_ns[queue]);
> +
> +             drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n",
> +                        v3d_queue_to_string(queue), 
> file_priv->jobs_sent[queue]);
> +     }
> +}
> +
> +static const struct file_operations v3d_drm_fops = {
> +     .owner = THIS_MODULE,
> +     DRM_GEM_FOPS,
> +     .show_fdinfo = drm_show_fdinfo,
> +};

Dunno where, but could you document somewhere what is the expected
counting behavior in case of a GPU reset?

>  
>  /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
>   * protection between clients.  Note that render nodes would be
> @@ -176,6 +203,7 @@ static const struct drm_driver v3d_drm_driver = {
>       .ioctls = v3d_drm_ioctls,
>       .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
>       .fops = &v3d_drm_fops,
> +     .show_fdinfo = v3d_show_fdinfo,
>  
>       .name = DRIVER_NAME,
>       .desc = DRIVER_DESC,
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 7f664a4b2a75..7f2897e5b2cb 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -21,6 +21,18 @@ struct reset_control;
>  
>  #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
>  
> +static inline char *v3d_queue_to_string(enum v3d_queue queue)
> +{
> +     switch (queue) {
> +     case V3D_BIN: return "bin";
> +     case V3D_RENDER: return "render";
> +     case V3D_TFU: return "tfu";
> +     case V3D_CSD: return "csd";
> +     case V3D_CACHE_CLEAN: return "cache_clean";
> +     }
> +     return "UNKNOWN";
> +}
> +
>  struct v3d_queue_state {
>       struct drm_gpu_scheduler sched;
>  
> @@ -167,6 +179,12 @@ struct v3d_file_priv {
>       } perfmon;
>  
>       struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
> +
> +     u64 start_ns[V3D_MAX_QUEUES];
> +
> +     u64 enabled_ns[V3D_MAX_QUEUES];
> +
> +     u64 jobs_sent[V3D_MAX_QUEUES];
>  };
>  
>  struct v3d_bo {
> @@ -238,6 +256,11 @@ struct v3d_job {
>        */
>       struct v3d_perfmon *perfmon;
>  
> +     /* File descriptor of the process that submitted the job that could be 
> used
> +      * for collecting stats by process of GPU usage.
> +      */
> +     struct drm_file *file;
> +
>       /* Callback for the freeing of the job on refcount going to 0. */
>       void (*free)(struct kref *ref);
>  };
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 2e94ce788c71..40ed0c7c3fad 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
> *file_priv,
>       job = *container;
>       job->v3d = v3d;
>       job->free = free;
> +     job->file = file_priv;
>  
>       ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
>                                v3d_priv);
> diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
> index e714d5318f30..c898800ae9c2 100644
> --- a/drivers/gpu/drm/v3d/v3d_irq.c
> +++ b/drivers/gpu/drm/v3d/v3d_irq.c
> @@ -14,6 +14,7 @@
>   */
>  
>  #include <linux/platform_device.h>
> +#include <linux/sched/clock.h>
>  
>  #include "v3d_drv.h"
>  #include "v3d_regs.h"
> @@ -100,6 +101,10 @@ v3d_irq(int irq, void *arg)
>       if (intsts & V3D_INT_FLDONE) {
>               struct v3d_fence *fence =
>                       to_v3d_fence(v3d->bin_job->base.irq_fence);
> +             struct v3d_file_priv *file = 
> v3d->bin_job->base.file->driver_priv;
> +
> +             file->enabled_ns[V3D_BIN] += local_clock() - 
> file->start_ns[V3D_BIN];
> +             file->start_ns[V3D_BIN] = 0;
>  
>               trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
>               dma_fence_signal(&fence->base);
> @@ -109,6 +114,10 @@ v3d_irq(int irq, void *arg)
>       if (intsts & V3D_INT_FRDONE) {
>               struct v3d_fence *fence =
>                       to_v3d_fence(v3d->render_job->base.irq_fence);
> +             struct v3d_file_priv *file = 
> v3d->render_job->base.file->driver_priv;
> +
> +             file->enabled_ns[V3D_RENDER] += local_clock() - 
> file->start_ns[V3D_RENDER];
> +             file->start_ns[V3D_RENDER] = 0;
>  
>               trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
>               dma_fence_signal(&fence->base);
> @@ -118,6 +127,10 @@ v3d_irq(int irq, void *arg)
>       if (intsts & V3D_INT_CSDDONE) {
>               struct v3d_fence *fence =
>                       to_v3d_fence(v3d->csd_job->base.irq_fence);
> +             struct v3d_file_priv *file = 
> v3d->csd_job->base.file->driver_priv;
> +
> +             file->enabled_ns[V3D_CSD] += local_clock() - 
> file->start_ns[V3D_CSD];
> +             file->start_ns[V3D_CSD] = 0;
>  
>               trace_v3d_csd_irq(&v3d->drm, fence->seqno);
>               dma_fence_signal(&fence->base);
> @@ -154,6 +167,10 @@ v3d_hub_irq(int irq, void *arg)
>       if (intsts & V3D_HUB_INT_TFUC) {
>               struct v3d_fence *fence =
>                       to_v3d_fence(v3d->tfu_job->base.irq_fence);
> +             struct v3d_file_priv *file = 
> v3d->tfu_job->base.file->driver_priv;
> +
> +             file->enabled_ns[V3D_TFU] += local_clock() - 
> file->start_ns[V3D_TFU];
> +             file->start_ns[V3D_TFU] = 0;
>  
>               trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
>               dma_fence_signal(&fence->base);
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 06238e6d7f5c..b360709c0765 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -18,6 +18,7 @@
>   * semaphores to interlock between them.
>   */
>  
> +#include <linux/sched/clock.h>
>  #include <linux/kthread.h>
>  
>  #include "v3d_drv.h"
> @@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct 
> drm_sched_job *sched_job)
>  {
>       struct v3d_bin_job *job = to_bin_job(sched_job);
>       struct v3d_dev *v3d = job->base.v3d;
> +     struct v3d_file_priv *file = job->base.file->driver_priv;
>       struct drm_device *dev = &v3d->drm;
>       struct dma_fence *fence;
>       unsigned long irqflags;
> @@ -107,6 +109,9 @@ static struct dma_fence *v3d_bin_job_run(struct 
> drm_sched_job *sched_job)
>       trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
>                           job->start, job->end);
>  
> +     file->start_ns[V3D_BIN] = local_clock();
> +     file->jobs_sent[V3D_BIN]++;
> +
>       v3d_switch_perfmon(v3d, &job->base);
>  
>       /* Set the current and end address of the control list.
> @@ -131,6 +136,7 @@ static struct dma_fence *v3d_render_job_run(struct 
> drm_sched_job *sched_job)
>  {
>       struct v3d_render_job *job = to_render_job(sched_job);
>       struct v3d_dev *v3d = job->base.v3d;
> +     struct v3d_file_priv *file = job->base.file->driver_priv;
>       struct drm_device *dev = &v3d->drm;
>       struct dma_fence *fence;
>  
> @@ -158,6 +164,9 @@ static struct dma_fence *v3d_render_job_run(struct 
> drm_sched_job *sched_job)
>       trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
>                           job->start, job->end);
>  
> +     file->start_ns[V3D_RENDER] = local_clock();
> +     file->jobs_sent[V3D_RENDER]++;
> +
>       v3d_switch_perfmon(v3d, &job->base);
>  
>       /* XXX: Set the QCFG */
> @@ -176,6 +185,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
>  {
>       struct v3d_tfu_job *job = to_tfu_job(sched_job);
>       struct v3d_dev *v3d = job->base.v3d;
> +     struct v3d_file_priv *file = job->base.file->driver_priv;
>       struct drm_device *dev = &v3d->drm;
>       struct dma_fence *fence;
>  
> @@ -190,6 +200,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
>  
>       trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
>  
> +     file->start_ns[V3D_TFU] = local_clock();
> +     file->jobs_sent[V3D_TFU]++;
> +
>       V3D_WRITE(V3D_TFU_IIA, job->args.iia);
>       V3D_WRITE(V3D_TFU_IIS, job->args.iis);
>       V3D_WRITE(V3D_TFU_ICA, job->args.ica);
> @@ -213,6 +226,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
>  {
>       struct v3d_csd_job *job = to_csd_job(sched_job);
>       struct v3d_dev *v3d = job->base.v3d;
> +     struct v3d_file_priv *file = job->base.file->driver_priv;
>       struct drm_device *dev = &v3d->drm;
>       struct dma_fence *fence;
>       int i;
> @@ -231,6 +245,9 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
>  
>       trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
>  
> +     file->start_ns[V3D_CSD] = local_clock();
> +     file->jobs_sent[V3D_CSD]++;
> +
>       v3d_switch_perfmon(v3d, &job->base);
>  
>       for (i = 1; i <= 6; i++)
> @@ -246,9 +263,16 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
>  {
>       struct v3d_job *job = to_v3d_job(sched_job);
>       struct v3d_dev *v3d = job->v3d;
> +     struct v3d_file_priv *file = job->file->driver_priv;
> +
> +     file->start_ns[V3D_CACHE_CLEAN] = local_clock();
> +     file->jobs_sent[V3D_CACHE_CLEAN]++;
>  
>       v3d_clean_caches(v3d);
>  
> +     file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - 
> file->start_ns[V3D_CACHE_CLEAN];
> +     file->start_ns[V3D_CACHE_CLEAN] = 0;
> +
>       return NULL;
>  }
>  
> -- 
> 2.41.0
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to