A fresh round of offline design discussions with internal team has decided 
that:

 - we dont want to use an interim circular buffer to copy all of the GuC
   generated register dumps of one or more 'engine-capture-groups'.
 - instead, we shall dynamically allocate multiple nodes, each node being
   a single "engine-capture-dump". A link list of one or many engine-capture-
   dumps would result from a single engine-capture-group.
 - this dynamic allocation should happen during the G2H error-capture
   notification event which happens before the corresponding G2H context-
   reset that triggers the i915_gpu_coredump (where we want to avoid
   memory allocation moving forward).
 - we also realize that during the link-list allocation we would need
   a first-parse of the guc-log-error-state-capture head-to-tail entries
   in order to duplicate global and engine-class register dumps for each
   each engine instance register dump if we find dependent-engine resets
   in a engine-capture-group.
 - later when i915_gpu_coredump calls into capture_engine, we finally
   attach the corresponding node from the link list above (detaching it
   from that link list) into i915_gpu_coredump's intel_engine_coredump
   structure when have matching LRCA/guc-id/engine-instace.
 - we would also have to add a flag through i915_gpu_coredump top level
   trigger through to capture_engine to indicate if the capture was triggered
   via a guc context reset or a forced user reset or gt-reset. In the latter
   case (user/gt reset), we should capture the register values directly
   from the HW, i.e. the pre-guc behavior without matching against GuC.

...alan


On Tue, 2022-01-18 at 02:03 -0800, Alan Previn wrote:

> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> index b637628905ec..fc80c5f31915 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c+static void 
> __guc_capture_store_snapshot_work(struct intel_guc *guc)
..
..
> +{
> +     struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> +     unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, 
> full_count;
> +     struct guc_log_buffer_state *log_buf_state;
> +     struct guc_log_buffer_state log_buf_state_local;
> +     void *src_data, *dst_data = NULL;
> +     bool new_overflow;
> +
> +     /* Lock to get the pointer to GuC capture-log-buffer-state */
> +     mutex_lock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock);
> +     log_buf_state = guc->log.buf_addr +
> +                     (sizeof(struct guc_log_buffer_state) * 
> GUC_CAPTURE_LOG_BUFFER);
> +     src_data = guc->log.buf_addr + 
> intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER);
> +
> +     /*
> +      * Make a copy of the state structure, inside GuC log buffer
> +      * (which is uncached mapped), on the stack to avoid reading
> +      * from it multiple times.
> +      */
> +     memcpy(&log_buf_state_local, log_buf_state, sizeof(struct 
> guc_log_buffer_state));
> +     buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER);
> +     read_offset = log_buf_state_local.read_ptr;
> +     write_offset = log_buf_state_local.sampled_write_ptr;
> +     full_count = log_buf_state_local.buffer_full_cnt;
> +
> +     /* Bookkeeping stuff */
> +     guc->log_state[GUC_CAPTURE_LOG_BUFFER].flush += 
> log_buf_state_local.flush_to_file;
> +     new_overflow = intel_guc_check_log_buf_overflow(guc,
> +                                                     
> &guc->log_state[GUC_CAPTURE_LOG_BUFFER],
> +                                                     full_count);
> +
> +     /* Update the state of shared log buffer */
> +     log_buf_state->read_ptr = write_offset;
> +     log_buf_state->flush_to_file = 0;
> +
> +     mutex_unlock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock);
> +
> +     dst_data = guc->capture.priv->out_store.addr;
> +     if (dst_data) {
> +             mutex_lock(&guc->capture.priv->out_store.lock);
> +
> +             /* Now copy the actual logs. */
> +             if (unlikely(new_overflow)) {
> +                     /* copy the whole buffer in case of overflow */
> +                     read_offset = 0;
> +                     write_offset = buffer_size;
> +             } else if (unlikely((read_offset > buffer_size) ||
> +                        (write_offset > buffer_size))) {
> +                     drm_err(&i915->drm, "invalid GuC log capture buffer 
> state!\n");
> +                     /* copy whole buffer as offsets are unreliable */
> +                     read_offset = 0;
> +                     write_offset = buffer_size;
> +             }
> +
> +             /* first copy from the tail end of the GuC log capture buffer */
> +             if (read_offset > write_offset) {
> +                     guc_capture_store_insert(guc, 
> &guc->capture.priv->out_store, src_data,
> +                                              write_offset);
> +                     bytes_to_copy = buffer_size - read_offset;
> +             } else {
> +                     bytes_to_copy = write_offset - read_offset;
> +             }
> +             guc_capture_store_insert(guc, &guc->capture.priv->out_store, 
> src_data + read_offset,
> +                                      bytes_to_copy);
> +
> +             mutex_unlock(&guc->capture.priv->out_store.lock);
> +     }
> +}
> +
> +void intel_guc_capture_store_snapshot(struct intel_guc *guc)
> +{
> +     if (guc->capture.priv)
> +             __guc_capture_store_snapshot_work(guc);
> +}
> +
> +static void guc_capture_store_destroy(struct intel_guc *guc)
> +{
> +     mutex_destroy(&guc->capture.priv->out_store.lock);
> +     guc->capture.priv->out_store.size = 0;
> +     kfree(guc->capture.priv->out_store.addr);
> +     guc->capture.priv->out_store.addr = NULL;
> +}
> +
> +static int guc_capture_store_create(struct intel_guc *guc)
> +{
> +     /*
> +      * Make this interim buffer larger than GuC capture output buffer so 
> that we can absorb
> +      * a little delay when processing the raw capture dumps into text 
> friendly logs
> +      * for the i915_gpu_coredump output
> +      */
> +     size_t max_dump_size;
> +
> +     GEM_BUG_ON(guc->capture.priv->out_store.addr);
> +
> +     max_dump_size = PAGE_ALIGN(intel_guc_capture_output_min_size_est(guc));
> +     max_dump_size = roundup_pow_of_two(max_dump_size);
> +
> +     guc->capture.priv->out_store.addr = kzalloc(max_dump_size, GFP_KERNEL);
> +     if (!guc->capture.priv->out_store.addr)
> +             return -ENOMEM;
> +
> +     guc->capture.priv->out_store.size = max_dump_size;
> +     mutex_init(&guc->capture.priv->out_store.lock);
> +
> +     return 0;
> +}
> +

Reply via email to