xe: Add devcoredump chunking

Cavitt, Jonathan Thu, 03 Apr 2025 14:29:14 -0700

-----Original Message-----
From: Intel-xe <intel-xe-boun...@lists.freedesktop.org> On Behalf Of Matthew 
Brost
Sent: Thursday, April 3, 2025 1:27 PM
To: intel...@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Subject: [PATCH v2 1/4] drm/xe: Add devcoredump chunking
> 
> Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit
> of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read
> callback as needed.
> 
> Signed-off-by: Matthew Brost <matthew.br...@intel.com>


I have no issues with this patch, though you should maybe get a second opinion.
Reviewed-by: Jonathan Cavitt <jonathan.cav...@intel.com>
-Jonathan Cavitt

> ---
>  drivers/gpu/drm/xe/xe_devcoredump.c       | 59 ++++++++++++++++++-----
>  drivers/gpu/drm/xe/xe_devcoredump_types.h |  2 +
>  drivers/gpu/drm/xe/xe_guc_hwconfig.c      |  2 +-
>  3 files changed, 50 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c 
> b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 81b9d9bb3f57..a9e618abf8ac 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct 
> xe_exec_queue *q)
>       return &q->gt->uc.guc;
>  }
>  
> -static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
> +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
> +                                  ssize_t start,
>                                    struct xe_devcoredump *coredump)
>  {
>       struct xe_device *xe;
> @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t 
> count,
>       ss = &coredump->snapshot;
>  
>       iter.data = buffer;
> -     iter.start = 0;
> +     iter.start = start;
>       iter.remain = count;
>  
>       p = drm_coredump_printer(&iter);
> @@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct 
> xe_devcoredump_snapshot *ss)
>       ss->vm = NULL;
>  }
>  
> +#define XE_DEVCOREDUMP_CHUNK_MAX     (SZ_512M + SZ_1G)
> +
>  static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>                                  size_t count, void *data, size_t datalen)
>  {
> @@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t 
> offset,
>       /* Ensure delayed work is captured before continuing */
>       flush_work(&ss->work);
>  
> +     if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
> +             xe_pm_runtime_get(gt_to_xe(ss->gt));
> +
>       mutex_lock(&coredump->lock);
>  
>       if (!ss->read.buffer) {
> @@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t 
> offset,
>               return 0;
>       }
>  
> +     if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
> +         offset < ss->read.chunk_position) {
> +             ss->read.chunk_position =
> +                     ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX);
> +
> +             __xe_devcoredump_read(ss->read.buffer,
> +                                   XE_DEVCOREDUMP_CHUNK_MAX,
> +                                   ss->read.chunk_position, coredump);
> +     }
> +
>       byte_copied = count < ss->read.size - offset ? count :
>               ss->read.size - offset;
> -     memcpy(buffer, ss->read.buffer + offset, byte_copied);
> +     memcpy(buffer, ss->read.buffer +
> +            (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied);
>  
>       mutex_unlock(&coredump->lock);
>  
> +     if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
> +             xe_pm_runtime_put(gt_to_xe(ss->gt));
> +
>       return byte_copied;
>  }
>  
> @@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct 
> work_struct *work)
>       xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
>       xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
>  
> -     xe_pm_runtime_put(xe);
> +     ss->read.chunk_position = 0;
>  
>       /* Calculate devcoredump size */
> -     ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
> -
> -     ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
> -     if (!ss->read.buffer)
> -             return;
> +     ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump);
> +
> +     if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) {
> +             ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
> +                                        GFP_USER);
> +             if (!ss->read.buffer)
> +                     goto put_pm;
> +
> +             __xe_devcoredump_read(ss->read.buffer,
> +                                   XE_DEVCOREDUMP_CHUNK_MAX,
> +                                   0, coredump);
> +     } else {
> +             ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
> +             if (!ss->read.buffer)
> +                     goto put_pm;
> +
> +             __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
> +                                   coredump);
> +             xe_devcoredump_snapshot_free(ss);
> +     }
>  
> -     __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
> -     xe_devcoredump_snapshot_free(ss);
> +put_pm:
> +     xe_pm_runtime_put(xe);
>  }
>  
>  static void devcoredump_snapshot(struct xe_devcoredump *coredump,
> @@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const 
> char *prefix, char suffi
>       if (offset & 3)
>               drm_printf(p, "Offset not word aligned: %zu", offset);
>  
> -     line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
> +     line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC);
>       if (!line_buff) {
>               drm_printf(p, "Failed to allocate line buffer\n");
>               return;
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h 
> b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index 1a1d16a96b2d..a174385a6d83 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot {
>       struct {
>               /** @read.size: size of devcoredump in human readable format */
>               ssize_t size;
> +             /** @read.chunk_position: position of devcoredump chunk */
> +             ssize_t chunk_position;
>               /** @read.buffer: buffer of devcoredump in human readable 
> format */
>               char *buffer;
>       } read;
> diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c 
> b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
> index af2c817d552c..21403a250834 100644
> --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
> +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
> @@ -175,7 +175,7 @@ int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 
> attribute, u32 *val)
>       if (num_dw == 0)
>               return -EINVAL;
>  
> -     hwconfig = kzalloc(size, GFP_KERNEL);
> +     hwconfig = kzalloc(size, GFP_ATOMIC);
>       if (!hwconfig)
>               return -ENOMEM;
>  
> -- 
> 2.34.1
> 
>

RE: [PATCH v2 1/4] drm/xe: Add devcoredump chunking

Reply via email to