On 05/03/2026 15:00, Alexandru Dadu wrote: > The firmware will send the context reset notification message as > part of handling hardware recovery (HWR) events. This commit decodes > the message and prints via drm_info(). This eliminates the "Unknown > FWCCB command" message that was previously printed.
Hi Alex, Thanks for taking over this change! Just a couple comments from me. Feel free to ignore the nits, especially if no other changes are required around them. > > Co-authored-by: Sarah Walker <[email protected]> > Signed-off-by: Alexandru Dadu <[email protected]> > --- > drivers/gpu/drm/imagination/Makefile | 1 + > drivers/gpu/drm/imagination/pvr_ccb.c | 6 ++ > drivers/gpu/drm/imagination/pvr_dump.c | 111 > +++++++++++++++++++++ > drivers/gpu/drm/imagination/pvr_dump.h | 17 ++++ > .../gpu/drm/imagination/pvr_rogue_fwif_shared.h | 12 +++ > 5 files changed, 147 insertions(+) > > diff --git a/drivers/gpu/drm/imagination/Makefile > b/drivers/gpu/drm/imagination/Makefile > index f5072f06b4c4..1222a14262e4 100644 > --- a/drivers/gpu/drm/imagination/Makefile > +++ b/drivers/gpu/drm/imagination/Makefile > @@ -8,6 +8,7 @@ powervr-y := \ > pvr_device.o \ > pvr_device_info.o \ > pvr_drv.o \ > + pvr_dump.o \ > pvr_free_list.o \ > pvr_fw.o \ > pvr_fw_meta.o \ > diff --git a/drivers/gpu/drm/imagination/pvr_ccb.c > b/drivers/gpu/drm/imagination/pvr_ccb.c > index 9294b4ba1de7..95d91cde7241 100644 > --- a/drivers/gpu/drm/imagination/pvr_ccb.c > +++ b/drivers/gpu/drm/imagination/pvr_ccb.c > @@ -4,6 +4,7 @@ > #include "pvr_ccb.h" > #include "pvr_device.h" > #include "pvr_drv.h" > +#include "pvr_dump.h" > #include "pvr_free_list.h" > #include "pvr_fw.h" > #include "pvr_gem.h" > @@ -150,6 +151,11 @@ process_fwccb_command(struct pvr_device *pvr_dev, struct > rogue_fwif_fwccb_cmd *c > pvr_free_list_process_grow_req(pvr_dev, > &cmd->cmd_data.cmd_free_list_gs); > break; > > + case ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_NOTIFICATION: > + pvr_context_reset_notification(pvr_dev, > + > &cmd->cmd_data.cmd_context_reset_notification); > + break; > + This hunk does not currently apply cleanly on drm-misc-next. It's fairly obvious what the resolution is, but would you mind rebasing if/when you send a v2? > default: > drm_info(from_pvr_device(pvr_dev), "Received unknown FWCCB > command %x\n", > cmd->cmd_type); > diff --git a/drivers/gpu/drm/imagination/pvr_dump.c > b/drivers/gpu/drm/imagination/pvr_dump.c > new file mode 100644 > index 000000000000..4b7ea38a83bd > --- /dev/null > +++ b/drivers/gpu/drm/imagination/pvr_dump.c > @@ -0,0 +1,111 @@ > +// SPDX-License-Identifier: GPL-2.0 OR MIT > +/* Copyright (c) 2026 Imagination Technologies Ltd. */ > + > +#include <drm/drm_print.h> > +#include <linux/types.h> > +#include "pvr_device.h" > +#include "pvr_dump.h" > +#include "pvr_rogue_fwif.h" Nit: our include order in most places is to have the "pvr_*" lines first, then a blank line, then the <*> lines. > + > +static const char * > +get_reset_reason_desc(enum rogue_context_reset_reason reason) > +{ > + switch (reason) { > + case ROGUE_CONTEXT_RESET_REASON_NONE: > + return "None"; > + case ROGUE_CONTEXT_RESET_REASON_GUILTY_LOCKUP: > + return "Guilty lockup"; > + case ROGUE_CONTEXT_RESET_REASON_INNOCENT_LOCKUP: > + return "Innocent lockup"; > + case ROGUE_CONTEXT_RESET_REASON_GUILTY_OVERRUNING: > + return "Guilty overrunning"; > + case ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING: > + return "Innocent overrunning"; > + case ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH: > + return "Hard context switch"; > + case ROGUE_CONTEXT_RESET_REASON_WGP_CHECKSUM: > + return "CDM Mission/safety checksum mismatch"; > + case ROGUE_CONTEXT_RESET_REASON_TRP_CHECKSUM: > + return "TRP checksum mismatch"; > + case ROGUE_CONTEXT_RESET_REASON_GPU_ECC_OK: > + return "GPU ECC error (corrected, OK)"; > + case ROGUE_CONTEXT_RESET_REASON_GPU_ECC_HWR: > + return "GPU ECC error (uncorrected, HWR)"; > + case ROGUE_CONTEXT_RESET_REASON_FW_ECC_OK: > + return "Firmware ECC error (corrected, OK)"; > + case ROGUE_CONTEXT_RESET_REASON_FW_ECC_ERR: > + return "Firmware ECC error (uncorrected, ERR)"; > + case ROGUE_CONTEXT_RESET_REASON_FW_WATCHDOG: > + return "Firmware watchdog"; > + case ROGUE_CONTEXT_RESET_REASON_FW_PAGEFAULT: > + return "Firmware pagefault"; > + case ROGUE_CONTEXT_RESET_REASON_FW_EXEC_ERR: > + return "Firmware execution error"; > + case ROGUE_CONTEXT_RESET_REASON_HOST_WDG_FW_ERR: > + return "Host watchdog"; > + case ROGUE_CONTEXT_GEOM_OOM_DISABLED: > + return "Geometry OOM disabled"; > + > + default: > + return "Unknown"; > + } > +} > + > +static const char * > +get_dm_name(u32 dm) > +{ > + switch (dm) { > + case PVR_FWIF_DM_GP: > + return "General purpose"; > + case PVR_FWIF_DM_2D: > + return "2D"; Have you considered PVR_FWIF_DM_TDM for this branch? They have the same value but a hardware feature determines which is present. Can you either pass the struct pvr_device down and check the feature in this branch to determine the correct string, or, more simply, just update the string to indicate that it could be either (e.g. "2D or TDM"). > + case PVR_FWIF_DM_GEOM: > + return "Geometry"; > + case PVR_FWIF_DM_FRAG: > + return "Fragment"; > + case PVR_FWIF_DM_CDM: > + return "Compute"; > + case PVR_FWIF_DM_RAY: > + return "Raytracing"; > + case PVR_FWIF_DM_GEOM2: > + return "Geometry 2"; > + case PVR_FWIF_DM_GEOM3: > + return "Geometry 3"; > + case PVR_FWIF_DM_GEOM4: > + return "Geometry 4"; > + > + default: > + return "Unknown"; > + } > +} > + > +/** > + * pvr_context_reset_notification() - Handle context reset notification from > FW > + * @pvr_dev: Device pointer. > + * @data: Data provided by FW. > + * > + * This will decode the data structure provided by FW and print the results > via drm_info(). > + */ > +void > +pvr_context_reset_notification(struct pvr_device *pvr_dev, Nit: can we make this pvr_dump_context_reset_notification() or similar to make it clear we're not really doing anything about the context reset in this function? > + struct rogue_fwif_fwccb_cmd_context_reset_data > *data) > +{ > + struct drm_device *drm_dev = from_pvr_device(pvr_dev); > + > + if (data->flags & ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG_ALL_CTXS) { > + drm_info(drm_dev, "Received context reset notification for > all contexts\n"); > + } else { > + drm_info(drm_dev, "Received context reset notification on > context %u\n", > + data->server_common_context_id); > + } > + > + drm_info(drm_dev, " Reset reason=%u (%s)\n", data->reset_reason, I was going to point out that we probably need a cast here since the underlying type of an enum is unspecified and could technically be any integer type, but the larger problem is that we shouldn't have such ambiguity in the firmware interface at all. Can you update struct rogue_fwif_fwccb_cmd_context_reset_data->reset_reason to be a u32 (I suspect 32-bits was the intended size here) so the layout is more explicit? This should be a separate commit with a Fixes: tag pointing to the commit that added the structure. There's another use of that enum in struct rogue_context_reset_reason_data, that should be updated too. I'd annotate the fields with a comment indicating that the valid values for those fields are those of enum rogue_context_reset_reason too. > + get_reset_reason_desc(data->reset_reason)); This line will need a cast after that change though. > + drm_info(drm_dev, " Data Master=%u (%s)\n", data->dm, > get_dm_name(data->dm)); > + drm_info(drm_dev, " Job ref=%u\n", data->reset_job_ref); > + > + if (data->flags & ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG_PF) { > + drm_info(drm_dev, " Page fault occurred, fault > address=%llx\n", > + (unsigned long long)data->fault_address); This cast is unnecessary as %llx is already the correct format specifier for u64[1] (and, by extension, aligned_u64). > + } > +} > diff --git a/drivers/gpu/drm/imagination/pvr_dump.h > b/drivers/gpu/drm/imagination/pvr_dump.h > new file mode 100644 > index 000000000000..450e8b9ebab8 > --- /dev/null > +++ b/drivers/gpu/drm/imagination/pvr_dump.h > @@ -0,0 +1,17 @@ > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ > +/* Copyright (c) 2026 Imagination Technologies Ltd. */ > + > +#ifndef __PVR_DUMP_H__ > +#define __PVR_DUMP_H__ Nit: I think this should just be PVR_DUMP_H. > + > +/* Forward declaration from pvr_device.h. */ > +struct pvr_device; > + > +/* Forward declaration from pvr_rogue_fwif.h. */ > +struct rogue_fwif_fwccb_cmd_context_reset_data; > + > +void > +pvr_context_reset_notification(struct pvr_device *pvr_dev, > + struct rogue_fwif_fwccb_cmd_context_reset_data > *data); > + > +#endif /* __PVR_DUMP_H__ */ > diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h > b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h > index 6c09c15bf9bd..f622553cdc11 100644 > --- a/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h > +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h > @@ -236,6 +236,18 @@ enum rogue_context_reset_reason { > ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING = 4, > /* Forced reset to ensure scheduling requirements */ > ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH = 5, > + /* CDM Mission/safety checksum mismatch */ > + ROGUE_CONTEXT_RESET_REASON_WGP_CHECKSUM = 6, > + /* TRP checksum mismatch */ > + ROGUE_CONTEXT_RESET_REASON_TRP_CHECKSUM = 7, > + /* GPU ECC error (corrected, OK) */ > + ROGUE_CONTEXT_RESET_REASON_GPU_ECC_OK = 8, > + /* GPU ECC error (uncorrected, HWR) */ > + ROGUE_CONTEXT_RESET_REASON_GPU_ECC_HWR = 9, > + /* FW ECC error (corrected, OK) */ > + ROGUE_CONTEXT_RESET_REASON_FW_ECC_OK = 10, > + /* FW ECC error (uncorrected, ERR) */ > + ROGUE_CONTEXT_RESET_REASON_FW_ECC_ERR = 11, Filling out the rest of this enum feels useful even without the rest of the patch. Maybe this should be split out into a separete patch before this one? Chers, Matt [1]: https://www.kernel.org/doc/html/latest/core-api/printk-formats.html > /* FW Safety watchdog triggered */ > ROGUE_CONTEXT_RESET_REASON_FW_WATCHDOG = 12, > /* FW page fault (no HWR) */ > > --- > base-commit: 68b271a3a94cfd6c7695a96b6398b52feb89e2c2 > change-id: > 20260305-b4-firmware-context-reset-notification-handling-694a1b5e6b8c > > Best regards, > -- > Alexandru Dadu <[email protected]> > -- Matt Coster E: [email protected]
OpenPGP_signature.asc
Description: OpenPGP digital signature
