On 08-05-2025 04:41, Daniele Ceraolo Spurio wrote:
On 4/29/2025 9:09 AM, Badal Nilawar wrote:Search for late binding firmware binaries and populate the meta data of firmware structures. Signed-off-by: Badal Nilawar <badal.nila...@intel.com> --- drivers/gpu/drm/xe/xe_device.c | 2 + drivers/gpu/drm/xe/xe_late_bind_fw.c | 101 ++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_late_bind_fw.h | 1 + 3 files changed, 101 insertions(+), 3 deletions(-)diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.cindex 86a7b7065122..d83864e7189c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -892,6 +892,8 @@ int xe_device_probe(struct xe_device *xe) xe_late_bind_init(&xe->late_bind); + xe_late_bind_fw_init(&xe->late_bind);Maybe call this from inside xe_late_bind_init?
Sure.
No late binding firmware will not be available for all card, as suggested firmware_request_nowarn here.+ err = xe_oa_init(xe); if (err) return err;diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.cindex 7981fc500a78..297238fd3d16 100644 --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -5,6 +5,7 @@ #include <linux/component.h> #include <linux/delay.h> +#include <linux/firmware.h> #include <drm/drm_managed.h> #include <drm/intel/i915_component.h> @@ -13,13 +14,108 @@ #include "xe_device.h" #include "xe_late_bind_fw.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" -static struct xe_device * -late_bind_to_xe(struct xe_late_bind *late_bind) +static const char * const fw_id_to_name[] = { + [FAN_CONTROL_ID] = "fan_control", + [VOLTAGE_REGULATOR_ID] = "voltage_regulator", + }; + +static const u32 fw_id_to_type[] = { + [FAN_CONTROL_ID] = CSC_LATE_BINDING_TYPE_FAN_CONTROL,+ [VOLTAGE_REGULATOR_ID] = CSC_LATE_BINDING_TYPE_VOLTAGE_REGULATOR+ }; ++static struct xe_device *late_bind_to_xe(struct xe_late_bind *late_bind){ return container_of(late_bind, struct xe_device, late_bind); } +static int late_bind_fw_num_fans(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + u32 uval; + + if (!xe_pcode_read(root_tile,+ PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL))+ return uval; + else + return 0; +} + +static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 id) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_late_bind_fw *lb_fw; + const struct firmware *fw; + u32 num_fans; + int ret; + + if (!late_bind->component_added) + return 0; + + if (id >= MAX_ID) + return -EINVAL; + + lb_fw = &late_bind->late_bind_fw[id]; + + lb_fw->id = id; + lb_fw->type = fw_id_to_type[id]; + + if (lb_fw->type == CSC_LATE_BINDING_TYPE_FAN_CONTROL) { + num_fans = late_bind_fw_num_fans(late_bind); + drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); + if (!num_fans) + return 0; + } + + lb_fw->flags = CSC_LATE_BINDING_FLAGS_IS_PERSISTENT; ++ snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin",+ fw_id_to_name[id], pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); ++ drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path);+ ret = request_firmware(&fw, lb_fw->blob_path, xe->drm.dev);Are we expecting late binding firmwares for all possible cards to always be available? because if not (and therefore if this fetch can fail) we should change this to firmware_request_nowarn to avoid throwing errors
+ if (ret) { + drm_err(&xe->drm, "Failed to request %s\n", lb_fw->blob_path);Same as above, if not finding the blob is a valid scenario then this should be a drm_dbg. Maybe even reword to make it clear it's not a failure but just the fact that there is no FW for the card.
I will change drm_err to drm_dbg.
+ lb_fw->valid = false; + return 0; + } + + if (fw->size > MAX_PAYLOAD_SIZE) + lb_fw->payload_size = MAX_PAYLOAD_SIZE;Is this safe? It feels weird to send a truncated firmware for something like voltage regulators. IMO if the firmware is too big we should throw and error and bail out.
Sure, let's throw the error if firmware is too big.
+ else + lb_fw->payload_size = fw->size; + + memcpy(lb_fw->payload, fw->data, lb_fw->payload_size); + release_firmware(fw); + lb_fw->valid = true; + + return 0; +} + +/** + * xe_mei_late_bind_fw_init() - Initialize late bind firmware + *+ * Return: 0 if the initialization was successful, a negative errno otherwise.+ */ +int xe_late_bind_fw_init(struct xe_late_bind *late_bind) +{ + int id; + int ret; + + for (id = 0; id < MAX_ID; id++) { + ret = late_bind_fw_init(late_bind, id); + if (ret) + return ret; + } + return ret; +} + static int xe_late_bind_component_bind(struct device *xe_kdev, struct device *mei_kdev, void *data) { @@ -83,7 +179,6 @@ int xe_late_bind_init(struct xe_late_bind *late_bind) } late_bind->component_added = true; -stray blank line removal
Ok. Thanks, Badal
Daniele/* the component must be removed before unload, so can't use drmm for cleanup */return 0;diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.hindex 21299de54b47..e88c637b15a6 100644 --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h @@ -12,5 +12,6 @@ struct xe_late_bind; int xe_late_bind_init(struct xe_late_bind *late_bind); void xe_late_bind_remove(struct xe_late_bind *late_bind); +int xe_late_bind_fw_init(struct xe_late_bind *late_bind); #endif