-----Original Message----- From: Intel-xe <intel-xe-boun...@lists.freedesktop.org> On Behalf Of Matthew Brost Sent: Thursday, April 3, 2025 1:27 PM To: intel...@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Subject: [PATCH v2 1/4] drm/xe: Add devcoredump chunking > > Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit > of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read > callback as needed. > > Signed-off-by: Matthew Brost <matthew.br...@intel.com>
I have no issues with this patch, though you should maybe get a second opinion. Reviewed-by: Jonathan Cavitt <jonathan.cav...@intel.com> -Jonathan Cavitt > --- > drivers/gpu/drm/xe/xe_devcoredump.c | 59 ++++++++++++++++++----- > drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 + > drivers/gpu/drm/xe/xe_guc_hwconfig.c | 2 +- > 3 files changed, 50 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c > b/drivers/gpu/drm/xe/xe_devcoredump.c > index 81b9d9bb3f57..a9e618abf8ac 100644 > --- a/drivers/gpu/drm/xe/xe_devcoredump.c > +++ b/drivers/gpu/drm/xe/xe_devcoredump.c > @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct > xe_exec_queue *q) > return &q->gt->uc.guc; > } > > -static ssize_t __xe_devcoredump_read(char *buffer, size_t count, > +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, > + ssize_t start, > struct xe_devcoredump *coredump) > { > struct xe_device *xe; > @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t > count, > ss = &coredump->snapshot; > > iter.data = buffer; > - iter.start = 0; > + iter.start = start; > iter.remain = count; > > p = drm_coredump_printer(&iter); > @@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct > xe_devcoredump_snapshot *ss) > ss->vm = NULL; > } > > +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) > + > static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, > size_t count, void *data, size_t datalen) > { > @@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t > offset, > /* Ensure delayed work is captured before continuing */ > flush_work(&ss->work); > > + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) > + xe_pm_runtime_get(gt_to_xe(ss->gt)); > + > mutex_lock(&coredump->lock); > > if (!ss->read.buffer) { > @@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t > offset, > return 0; > } > > + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX || > + offset < ss->read.chunk_position) { > + ss->read.chunk_position = > + ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX); > + > + __xe_devcoredump_read(ss->read.buffer, > + XE_DEVCOREDUMP_CHUNK_MAX, > + ss->read.chunk_position, coredump); > + } > + > byte_copied = count < ss->read.size - offset ? count : > ss->read.size - offset; > - memcpy(buffer, ss->read.buffer + offset, byte_copied); > + memcpy(buffer, ss->read.buffer + > + (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied); > > mutex_unlock(&coredump->lock); > > + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) > + xe_pm_runtime_put(gt_to_xe(ss->gt)); > + > return byte_copied; > } > > @@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct > work_struct *work) > xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); > xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); > > - xe_pm_runtime_put(xe); > + ss->read.chunk_position = 0; > > /* Calculate devcoredump size */ > - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); > - > - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); > - if (!ss->read.buffer) > - return; > + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump); > + > + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) { > + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, > + GFP_USER); > + if (!ss->read.buffer) > + goto put_pm; > + > + __xe_devcoredump_read(ss->read.buffer, > + XE_DEVCOREDUMP_CHUNK_MAX, > + 0, coredump); > + } else { > + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); > + if (!ss->read.buffer) > + goto put_pm; > + > + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, > + coredump); > + xe_devcoredump_snapshot_free(ss); > + } > > - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); > - xe_devcoredump_snapshot_free(ss); > +put_pm: > + xe_pm_runtime_put(xe); > } > > static void devcoredump_snapshot(struct xe_devcoredump *coredump, > @@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const > char *prefix, char suffi > if (offset & 3) > drm_printf(p, "Offset not word aligned: %zu", offset); > > - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); > + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC); > if (!line_buff) { > drm_printf(p, "Failed to allocate line buffer\n"); > return; > diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h > b/drivers/gpu/drm/xe/xe_devcoredump_types.h > index 1a1d16a96b2d..a174385a6d83 100644 > --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h > +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h > @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot { > struct { > /** @read.size: size of devcoredump in human readable format */ > ssize_t size; > + /** @read.chunk_position: position of devcoredump chunk */ > + ssize_t chunk_position; > /** @read.buffer: buffer of devcoredump in human readable > format */ > char *buffer; > } read; > diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c > b/drivers/gpu/drm/xe/xe_guc_hwconfig.c > index af2c817d552c..21403a250834 100644 > --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c > +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c > @@ -175,7 +175,7 @@ int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 > attribute, u32 *val) > if (num_dw == 0) > return -EINVAL; > > - hwconfig = kzalloc(size, GFP_KERNEL); > + hwconfig = kzalloc(size, GFP_ATOMIC); > if (!hwconfig) > return -ENOMEM; > > -- > 2.34.1 > >