On 04/12/2023 17:33, Boris Brezillon wrote:
> Contains everything that's FW related, that includes the code dealing
> with the microcontroller unit (MCU) that's running the FW, and anything
> related to allocating memory shared between the FW and the CPU.
> 
> A few global FW events are processed in the IRQ handler, the rest is
> forwarded to the scheduler, since scheduling is the primary reason for
> the FW existence, and also the main source of FW <-> kernel
> interactions.
> 
> v3:
> - Make the FW path more future-proof (Liviu)
> - Use one waitqueue for all FW events
> - Simplify propagation of FW events to the scheduler logic
> - Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead
> - Account for the panthor_vm changes
> - Replace magic number with 0x7fffffff with ~0 to better signify that
>   it's the maximum permitted value.
> - More accurate rounding when computing the firmware timeout.
> - Add a 'sub iterator' helper function. This also adds a check that a
>   firmware entry doesn't overflow the firmware image.
> - Drop __packed from FW structures, natural alignment is good enough.
> - Other minor code improvements.
> 
> Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>
> Signed-off-by: Steven Price <steven.pr...@arm.com>
> ---
>  drivers/gpu/drm/panthor/panthor_fw.c | 1332 ++++++++++++++++++++++++++
>  drivers/gpu/drm/panthor/panthor_fw.h |  504 ++++++++++
>  2 files changed, 1836 insertions(+)
>  create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c
>  create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_fw.c 
> b/drivers/gpu/drm/panthor/panthor_fw.c
> new file mode 100644
> index 000000000000..85afe769f567
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_fw.c

...
> +static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
> +                                      const struct firmware *fw,
> +                                      struct panthor_fw_binary_iter *iter,
> +                                      u32 ehdr)
> +{
> +     struct panthor_fw_binary_section_entry_hdr hdr;
> +     struct panthor_fw_section *section;
> +     u32 section_size;
> +     u32 name_len;
> +     int ret;
> +
> +     ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
> +     if (ret)
> +             return ret;
> +
> +     if (hdr.data.end < hdr.data.start) {
> +             drm_err(&ptdev->base, "Firmware corrupted, data.end < 
> data.start (0x%x < 0x%x)\n",
> +                     hdr.data.end, hdr.data.start);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.va.end < hdr.va.start) {
> +             drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < 
> hdr.va.start (0x%x < 0x%x)\n",
> +                     hdr.va.end, hdr.va.start);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.data.end > fw->size) {
> +             drm_err(&ptdev->base, "Firmware corrupted, file truncated? 
> data_end=0x%x > fw size=0x%zx\n",
> +                     hdr.data.end, fw->size);
> +             return -EINVAL;
> +     }
> +
> +     if ((hdr.va.start & ~PAGE_MASK) != 0 ||
> +         (hdr.va.end & ~PAGE_MASK) != 0) {
> +             drm_err(&ptdev->base, "Firmware corrupted, virtual addresses 
> not page aligned: 0x%x-0x%x\n",
> +                     hdr.va.start, hdr.va.end);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
> +             drm_err(&ptdev->base, "Firmware contains interface with 
> unsupported flags (0x%x)\n",
> +                     hdr.flags);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
> +             drm_warn(&ptdev->base,
> +                      "Firmware protected mode entry not be supported, 
> ignoring");
> +             return 0;
> +     }
> +
> +     if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
> +         !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
> +             drm_err(&ptdev->base,
> +                     "Interface at 0x%llx must be shared", 
> CSF_MCU_SHARED_REGION_START);
> +             return -EINVAL;
> +     }
> +
> +     name_len = iter->size - iter->offset;
> +
> +     section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
> +     if (!section)
> +             return -ENOMEM;
> +
> +     list_add_tail(&section->node, &ptdev->fw->sections);
> +     section->flags = hdr.flags;
> +     section->data.size = hdr.data.end - hdr.data.start;
> +
> +     if (section->data.size > 0) {
> +             void *data = drmm_kmalloc(&ptdev->base, section->data.size, 
> GFP_KERNEL);
> +
> +             if (!data)
> +                     return -ENOMEM;
> +
> +             memcpy(data, fw->data + hdr.data.start, section->data.size);
> +             section->data.buf = data;
> +     }
> +
> +     if (name_len > 0) {
> +             char *name = drmm_kmalloc(&ptdev->base, name_len + 1, 
> GFP_KERNEL);
> +
> +             if (!name)
> +                     return -ENOMEM;
> +
> +             memcpy(name, iter->data + iter->offset, name_len);
> +             name[name_len] = '\0';
> +             section->name = name;
> +     }
> +
> +     section_size = hdr.va.end - hdr.va.start;
> +     if (section_size) {
> +             u32 cache_mode = hdr.flags & 
> CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
> +             struct panthor_gem_object *bo;
> +             u32 vm_map_flags = 0;
> +             struct sg_table *sgt;
> +             u64 va = hdr.va.start;
> +
> +             if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
> +                     vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
> +
> +             if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
> +                     vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
> +
> +             /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are 
> mapped to
> +              * non-cacheable for now. We might want to introduce a new
> +              * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
> +              * memory and is currently not used by our driver) for
> +              * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
> +              * of IO-coherent systems.
> +              */
> +             if (cache_mode != 
> CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
> +                     vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
> +
> +             section->mem = panthor_kernel_bo_create(ptdev, 
> panthor_fw_vm(ptdev),
> +                                                     section_size,
> +                                                     DRM_PANTHOR_BO_NO_MMAP,
> +                                                     vm_map_flags, va);
> +             if (IS_ERR(section->mem))
> +                     return PTR_ERR(section->mem);

There's a small bug here - if panthor_kernel_bo_create() fails, section->mem 
will be
left containing an error value, not NULL or a valid pointer.

This means that on rmmod, panthor_fw_unplug() will try and free the BO even 
though it
was never allocated, causing a kernel panic.

The solution is obviously to only assign section->mem if the allocation 
actually succeeded.

However I also wonder if the list_add_tail() call should be moved to later in 
the function,
after we know everything else has succeeded, or if this function needs some 
more error
handling in general.

> +
> +             if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != 
> hdr.va.start))
> +                     return -EINVAL;
> +
> +             if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
> +                     ret = panthor_kernel_bo_vmap(section->mem);
> +                     if (ret)
> +                             return ret;
> +             }
> +
> +             panthor_fw_init_section_mem(ptdev, section);
> +
> +             bo = to_panthor_bo(section->mem->obj);
> +             sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
> +             if (IS_ERR(sgt))
> +                     return PTR_ERR(section->mem);
> +
> +             dma_sync_sgtable_for_device(ptdev->base.dev, sgt, 
> DMA_TO_DEVICE);
> +     }
> +
> +     if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
> +             ptdev->fw->shared_section = section;
> +
> +     return 0;
> +}
> +

...

> +/**
> + * panthor_fw_unplug() - Called when the device is unplugged.
> + * @ptdev: Device.
> + *
> + * This function must make sure all pending operations are flushed before
> + * will release device resources, thus preventing any interaction with
> + * the HW.
> + *
> + * If there is still FW-related work running after this function returns,
> + * they must use drm_dev_{enter,exit}() and skip any HW access when
> + * drm_dev_enter() returns false.
> + */
> +void panthor_fw_unplug(struct panthor_device *ptdev)
> +{
> +     struct panthor_fw_section *section;
> +
> +     cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
> +
> +     /* Make sure the IRQ handler can be called after that point. */
> +     if (ptdev->fw->irq.irq)
> +             panthor_job_irq_suspend(&ptdev->fw->irq);
> +
> +     panthor_fw_stop(ptdev);
> +
> +     if (ptdev->fw->vm)
> +             panthor_vm_idle(ptdev->fw->vm);
> +
> +     list_for_each_entry(section, &ptdev->fw->sections, node)
> +             panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);

Here's where we destroy the potentially invalid section->mem.

Note that the issues are twofold:
Firstly, section->mem could be an error pointer as mentioned earlier.
Secondly, panthor_kernel_bo_destroy() doesn't actually check for error values 
or even NULL.

It would probably be worth adding NULL checks to panthor_kernel_bo_destroy() 
and panthor_kernel_bo_vunmap() to protect against this. 

> +
> +     panthor_vm_put(ptdev->fw->vm);
> +
> +     panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
> +}
> +

Cheers,
Chris

Reply via email to