Trace the fence dependencies similarly to how we print fences:

 ... , dependencies:{(context:606, seqno:38006)}

This allows tools to analyze the dependencies between the jobs (previously
it was only possible for fences traced by drm_sched_job_wait_dep).

Since drm_sched_job and drm_run_job use the same base event class,
the caller of trace_drm_run_job have to pass a dep_count of 0 (which
is ignored because dependencies are only considered at submit time).

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com>
---
 .../gpu/drm/scheduler/gpu_scheduler_trace.h   | 59 ++++++++++++++++---
 drivers/gpu/drm/scheduler/sched_entity.c      |  8 ++-
 drivers/gpu/drm/scheduler/sched_main.c        |  2 +-
 3 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h 
b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
index f6da377cdb99..0abcad26839c 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -26,15 +26,41 @@
 
 #include <linux/stringify.h>
 #include <linux/types.h>
+#include <linux/trace_seq.h>
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM gpu_scheduler
 #define TRACE_INCLUDE_FILE gpu_scheduler_trace
 
+#ifndef __TRACE_EVENT_GPU_SCHEDULER_PRINT_FN
+#define __TRACE_EVENT_GPU_SCHEDULER_PRINT_FN
+/* Similar to trace_print_array_seq but for fences. */
+static inline const char *__print_dma_fence_array(struct trace_seq *p, const 
void *buf, int count)
+{
+       const char *ret = trace_seq_buffer_ptr(p);
+       u64 *fences = (u64 *) buf;
+       const char *prefix = "";
+
+       trace_seq_putc(p, '{');
+       for (int i = 0; i < count; i++) {
+               u64 context = fences[2 * i], seqno = fences[2 * i + 1];
+
+               trace_seq_printf(p, "%s(context:%llu, seqno:%lld)",
+                                prefix, context, seqno);
+               prefix = ",";
+       }
+       trace_seq_putc(p, '}');
+       trace_seq_putc(p, 0);
+
+       return ret;
+}
+#endif
+
 DECLARE_EVENT_CLASS(drm_sched_job,
-           TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity 
*entity),
-           TP_ARGS(sched_job, entity),
+           TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity 
*entity,
+                    unsigned int dep_count),
+           TP_ARGS(sched_job, entity, dep_count),
            TP_STRUCT__entry(
                             __string(name, sched_job->sched->name)
                             __field(uint64_t, id)
@@ -43,9 +69,14 @@ DECLARE_EVENT_CLASS(drm_sched_job,
                             __string(dev, dev_name(sched_job->sched->dev))
                             __field(uint64_t, fence_context)
                             __field(uint64_t, fence_seqno)
+                            __field(int, n_deps)
+                            __dynamic_array(u64, deps, dep_count * 2)
                             ),
 
            TP_fast_assign(
+                          unsigned long idx;
+                          struct dma_fence *fence;
+                          u64 *dyn_arr;
                           __entry->id = sched_job->id;
                           __assign_str(name);
                           __entry->job_count = 
spsc_queue_count(&entity->job_queue);
@@ -54,22 +85,32 @@ DECLARE_EVENT_CLASS(drm_sched_job,
                           __assign_str(dev);
                           __entry->fence_context = 
sched_job->s_fence->finished.context;
                           __entry->fence_seqno = 
sched_job->s_fence->finished.seqno;
-
+                          __entry->n_deps = dep_count;
+                          if (dep_count) {
+                               dyn_arr = __get_dynamic_array(deps);
+                               xa_for_each(&sched_job->dependencies, idx, 
fence) {
+                                       dyn_arr[2 * idx] = fence->context;
+                                       dyn_arr[2 * idx + 1] = fence->seqno;
+                               }
+                          }
                           ),
-           TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job 
count:%u, hw job count:%d",
+           TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job 
count:%u, hw job count:%d, dependencies:%s",
                      __entry->id,
                      __entry->fence_context, __entry->fence_seqno, 
__get_str(name),
-                     __entry->job_count, __entry->hw_job_count)
+                     __entry->job_count, __entry->hw_job_count,
+                     __print_dma_fence_array(p, __get_dynamic_array(deps), 
__entry->n_deps))
 );
 
 DEFINE_EVENT(drm_sched_job, drm_sched_job,
-           TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity 
*entity),
-           TP_ARGS(sched_job, entity)
+           TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity 
*entity,
+                    unsigned int dep_count),
+           TP_ARGS(sched_job, entity, dep_count)
 );
 
 DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
-           TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity 
*entity),
-           TP_ARGS(sched_job, entity),
+           TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity 
*entity,
+                    unsigned int dep_count),
+           TP_ARGS(sched_job, entity, 0),
            TP_printk("dev=%s, id=%llu, fence=(context:%llu, seqno:%lld), 
ring=%s, job count:%u, hw job count:%d",
                      __get_str(dev), __entry->id,
                      __entry->fence_context, __entry->fence_seqno, 
__get_str(name),
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 58c8161289fe..aa33a9d29b14 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -583,7 +583,13 @@ void drm_sched_entity_push_job(struct drm_sched_job 
*sched_job)
        bool first;
        ktime_t submit_ts;
 
-       trace_drm_sched_job(sched_job, entity);
+       if (trace_drm_sched_job_enabled()) {
+               unsigned long index;
+               void *f;
+
+               xa_for_each(&sched_job->dependencies, index, f) { }
+               trace_drm_sched_job(sched_job, entity, index);
+       }
        atomic_inc(entity->rq->sched->score);
        WRITE_ONCE(entity->last_user, current->group_leader);
 
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 7e90c9f95611..0bc9b7b24b34 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1201,7 +1201,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
        atomic_add(sched_job->credits, &sched->credit_count);
        drm_sched_job_begin(sched_job);
 
-       trace_drm_run_job(sched_job, entity);
+       trace_drm_run_job(sched_job, entity, 0);
        fence = sched->ops->run_job(sched_job);
        complete_all(&entity->entity_idle);
        drm_sched_fence_scheduled(s_fence, fence);
-- 
2.40.1

Reply via email to