On Wed, Sep 3, 2025 at 2:49 PM Oleksandr Tyshchenko <olekst...@gmail.com> wrote:
>
>
>
> On 03.09.25 13:25, Mykola Kvach wrote:
> > Hi Oleksandr,
>
> Hello Mykola
>
> >
> > On Wed, Sep 3, 2025 at 1:01 PM Oleksandr Tyshchenko
> > <oleksandr_tyshche...@epam.com> wrote:
> >>
> >>
> >>
> >> On 02.09.25 01:10, Mykola Kvach wrote:
> >>
> >> Hello Mykola
> >>
> >>
> >>> From: Oleksandr Tyshchenko <oleksandr_tyshche...@epam.com>
> >>>
> >>> Store and restore active context and micro-TLB registers.
> >>>
> >>> Tested on R-Car H3 Starter Kit.
> >>>
> >>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshche...@epam.com>
> >>> Signed-off-by: Mykola Kvach <mykola_kv...@epam.com>
> >>> ---
> >>> Changes in V6:
> >>> - refactor code related to hw_register struct, from now it's called
> >>>     ipmmu_reg_ctx
> >>
> >> The updated version looks good, thanks. However, I have one
> >> concern/request ...
> >>
> >>> ---
> >>>    xen/drivers/passthrough/arm/ipmmu-vmsa.c | 257 +++++++++++++++++++++++
> >>>    1 file changed, 257 insertions(+)
> >>>
> >>> diff --git a/xen/drivers/passthrough/arm/ipmmu-vmsa.c 
> >>> b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
> >>> index ea9fa9ddf3..0973559861 100644
> >>> --- a/xen/drivers/passthrough/arm/ipmmu-vmsa.c
> >>> +++ b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
> >>> @@ -71,6 +71,8 @@
> >>>    })
> >>>    #endif
> >>>
> >>> +#define dev_dbg(dev, fmt, ...)    \
> >>> +    dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
> >>>    #define dev_info(dev, fmt, ...)    \
> >>>        dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
> >>>    #define dev_warn(dev, fmt, ...)    \
> >>> @@ -130,6 +132,24 @@ struct ipmmu_features {
> >>>        unsigned int imuctr_ttsel_mask;
> >>>    };
> >>>
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +
> >>> +struct ipmmu_reg_ctx {
> >>> +    unsigned int imttlbr0;
> >>> +    unsigned int imttubr0;
> >>> +    unsigned int imttbcr;
> >>> +    unsigned int imctr;
> >>> +};
> >>> +
> >>> +struct ipmmu_vmsa_backup {
> >>> +    struct device *dev;
> >>> +    unsigned int *utlbs_val;
> >>> +    unsigned int *asids_val;
> >>> +    struct list_head list;
> >>> +};
> >>> +
> >>> +#endif
> >>> +
> >>>    /* Root/Cache IPMMU device's information */
> >>>    struct ipmmu_vmsa_device {
> >>>        struct device *dev;
> >>> @@ -142,6 +162,9 @@ struct ipmmu_vmsa_device {
> >>>        struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
> >>>        unsigned int utlb_refcount[IPMMU_UTLB_MAX];
> >>>        const struct ipmmu_features *features;
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +    struct ipmmu_reg_ctx *reg_backup[IPMMU_CTX_MAX];
> >>> +#endif
> >>>    };
> >>>
> >>>    /*
> >>> @@ -547,6 +570,222 @@ static void ipmmu_domain_free_context(struct 
> >>> ipmmu_vmsa_device *mmu,
> >>>        spin_unlock_irqrestore(&mmu->lock, flags);
> >>>    }
> >>>
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +
> >>> +static DEFINE_SPINLOCK(ipmmu_devices_backup_lock);
> >>> +static LIST_HEAD(ipmmu_devices_backup);
> >>> +
> >>> +static struct ipmmu_reg_ctx root_pgtable[IPMMU_CTX_MAX];
> >>> +
> >>> +static uint32_t ipmmu_imuasid_read(struct ipmmu_vmsa_device *mmu,
> >>> +                                   unsigned int utlb)
> >>> +{
> >>> +    return ipmmu_read(mmu, ipmmu_utlb_reg(mmu, IMUASID(utlb)));
> >>> +}
> >>> +
> >>> +static void ipmmu_utlbs_backup(struct ipmmu_vmsa_device *mmu)
> >>> +{
> >>> +    struct ipmmu_vmsa_backup *backup_data;
> >>> +
> >>> +    dev_dbg(mmu->dev, "Handle micro-TLBs backup\n");
> >>> +
> >>> +    spin_lock(&ipmmu_devices_backup_lock);
> >>> +
> >>> +    list_for_each_entry( backup_data, &ipmmu_devices_backup, list )
> >>> +    {
> >>> +        struct iommu_fwspec *fwspec = 
> >>> dev_iommu_fwspec_get(backup_data->dev);
> >>> +        unsigned int i;
> >>> +
> >>> +        if ( to_ipmmu(backup_data->dev) != mmu )
> >>> +            continue;
> >>> +
> >>> +        for ( i = 0; i < fwspec->num_ids; i++ )
> >>> +        {
> >>> +            unsigned int utlb = fwspec->ids[i];
> >>> +
> >>> +            backup_data->asids_val[i] = ipmmu_imuasid_read(mmu, utlb);
> >>> +            backup_data->utlbs_val[i] = ipmmu_imuctr_read(mmu, utlb);
> >>> +        }
> >>> +    }
> >>> +
> >>> +    spin_unlock(&ipmmu_devices_backup_lock);
> >>> +}
> >>> +
> >>> +static void ipmmu_utlbs_restore(struct ipmmu_vmsa_device *mmu)
> >>> +{
> >>> +    struct ipmmu_vmsa_backup *backup_data;
> >>> +
> >>> +    dev_dbg(mmu->dev, "Handle micro-TLBs restore\n");
> >>> +
> >>> +    spin_lock(&ipmmu_devices_backup_lock);
> >>> +
> >>> +    list_for_each_entry( backup_data, &ipmmu_devices_backup, list )
> >>> +    {
> >>> +        struct iommu_fwspec *fwspec = 
> >>> dev_iommu_fwspec_get(backup_data->dev);
> >>> +        unsigned int i;
> >>> +
> >>> +        if ( to_ipmmu(backup_data->dev) != mmu )
> >>> +            continue;
> >>> +
> >>> +        for ( i = 0; i < fwspec->num_ids; i++ )
> >>> +        {
> >>> +            unsigned int utlb = fwspec->ids[i];
> >>> +
> >>> +            ipmmu_imuasid_write(mmu, utlb, backup_data->asids_val[i]);
> >>> +            ipmmu_imuctr_write(mmu, utlb, backup_data->utlbs_val[i]);
> >>> +        }
> >>> +    }
> >>> +
> >>> +    spin_unlock(&ipmmu_devices_backup_lock);
> >>> +}
> >>> +
> >>> +static void ipmmu_domain_backup_context(struct ipmmu_vmsa_domain *domain)
> >>> +{
> >>> +    struct ipmmu_vmsa_device *mmu = domain->mmu->root;
> >>> +    struct ipmmu_reg_ctx *regs = mmu->reg_backup[domain->context_id];
> >>> +
> >>> +    dev_dbg(mmu->dev, "Handle domain context %u backup\n", 
> >>> domain->context_id);
> >>> +
> >>> +    regs->imttlbr0 = ipmmu_ctx_read_root(domain, IMTTLBR0);
> >>> +    regs->imttubr0 = ipmmu_ctx_read_root(domain, IMTTUBR0);
> >>> +    regs->imttbcr  = ipmmu_ctx_read_root(domain, IMTTBCR);
> >>> +    regs->imctr    = ipmmu_ctx_read_root(domain, IMCTR);
> >>> +}
> >>> +
> >>> +static void ipmmu_domain_restore_context(struct ipmmu_vmsa_domain 
> >>> *domain)
> >>> +{
> >>> +    struct ipmmu_vmsa_device *mmu = domain->mmu->root;
> >>> +    struct ipmmu_reg_ctx *regs  = mmu->reg_backup[domain->context_id];
> >>> +
> >>> +    dev_dbg(mmu->dev, "Handle domain context %u restore\n", 
> >>> domain->context_id);
> >>> +
> >>> +    ipmmu_ctx_write_root(domain, IMTTLBR0, regs->imttlbr0);
> >>> +    ipmmu_ctx_write_root(domain, IMTTUBR0, regs->imttubr0);
> >>> +    ipmmu_ctx_write_root(domain, IMTTBCR,  regs->imttbcr);
> >>> +    ipmmu_ctx_write_all(domain,  IMCTR,    regs->imctr | IMCTR_FLUSH);
> >>> +}
> >>> +
> >>> +/*
> >>> + * Xen: Unlike Linux implementation, Xen uses a single driver instance
> >>> + * for handling all IPMMUs. There is no framework for 
> >>> ipmmu_suspend/resume
> >>> + * callbacks to be invoked for each IPMMU device. So, we need to iterate
> >>> + * through all registered IPMMUs performing required actions.
> >>> + *
> >>> + * Also take care of restoring special settings, such as translation
> >>> + * table format, etc.
> >>> + */
> >>> +static int __must_check ipmmu_suspend(void)
> >>> +{
> >>> +    struct ipmmu_vmsa_device *mmu;
> >>> +
> >>> +    if ( !iommu_enabled )
> >>> +        return 0;
> >>> +
> >>> +    printk(XENLOG_DEBUG "ipmmu: Suspending ...\n");
> >>> +
> >>> +    spin_lock(&ipmmu_devices_lock);
> >>> +
> >>> +    list_for_each_entry( mmu, &ipmmu_devices, list )
> >>> +    {
> >>> +        if ( ipmmu_is_root(mmu) )
> >>> +        {
> >>> +            unsigned int i;
> >>> +
> >>> +            for ( i = 0; i < mmu->num_ctx; i++ )
> >>> +            {
> >>> +                if ( !mmu->domains[i] )
> >>> +                    continue;
> >>> +                ipmmu_domain_backup_context(mmu->domains[i]);
> >>> +            }
> >>> +        }
> >>> +        else
> >>> +            ipmmu_utlbs_backup(mmu);
> >>> +    }
> >>> +
> >>> +    spin_unlock(&ipmmu_devices_lock);
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static void ipmmu_resume(void)
> >>> +{
> >>> +    struct ipmmu_vmsa_device *mmu;
> >>> +
> >>> +    if ( !iommu_enabled )
> >>> +        return;
> >>> +
> >>> +    printk(XENLOG_DEBUG "ipmmu: Resuming ...\n");
> >>> +
> >>> +    spin_lock(&ipmmu_devices_lock);
> >>> +
> >>> +    list_for_each_entry( mmu, &ipmmu_devices, list )
> >>> +    {
> >>> +        uint32_t reg;
> >>> +
> >>> +        /* Do not use security group function */
> >>> +        reg = IMSCTLR + mmu->features->control_offset_base;
> >>> +        ipmmu_write(mmu, reg, ipmmu_read(mmu, reg) & 
> >>> ~IMSCTLR_USE_SECGRP);
> >>> +
> >>> +        if ( ipmmu_is_root(mmu) )
> >>> +        {
> >>> +            unsigned int i;
> >>> +
> >>> +            /* Use stage 2 translation table format */
> >>> +            reg = IMSAUXCTLR + mmu->features->control_offset_base;
> >>> +            ipmmu_write(mmu, reg, ipmmu_read(mmu, reg) | 
> >>> IMSAUXCTLR_S2PTE);
> >>> +
> >>> +            for ( i = 0; i < mmu->num_ctx; i++ )
> >>> +            {
> >>> +                if ( !mmu->domains[i] )
> >>> +                    continue;
> >>> +                ipmmu_domain_restore_context(mmu->domains[i]);
> >>> +            }
> >>> +        }
> >>> +        else
> >>> +            ipmmu_utlbs_restore(mmu);
> >>> +    }
> >>> +
> >>> +    spin_unlock(&ipmmu_devices_lock);
> >>> +}
> >>> +
> >>> +static int ipmmu_alloc_ctx_suspend(struct device *dev)
> >>> +{
> >>> +    struct ipmmu_vmsa_backup *backup_data;
> >>> +    unsigned int *utlbs_val, *asids_val;
> >>> +    struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> >>> +
> >>> +    utlbs_val = xzalloc_array(unsigned int, fwspec->num_ids);
> >>> +    if ( !utlbs_val )
> >>> +        return -ENOMEM;
> >>> +
> >>> +    asids_val = xzalloc_array(unsigned int, fwspec->num_ids);
> >>> +    if ( !asids_val )
> >>> +    {
> >>> +        xfree(utlbs_val);
> >>> +        return -ENOMEM;
> >>> +    }
> >>> +
> >>> +    backup_data = xzalloc(struct ipmmu_vmsa_backup);
> >>> +    if ( !backup_data )
> >>> +    {
> >>> +        xfree(utlbs_val);
> >>> +        xfree(asids_val);
> >>> +        return -ENOMEM;
> >>> +    }
> >>> +
> >>> +    backup_data->dev = dev;
> >>> +    backup_data->utlbs_val = utlbs_val;
> >>> +    backup_data->asids_val = asids_val;
> >>> +
> >>> +    spin_lock(&ipmmu_devices_backup_lock);
> >>> +    list_add(&backup_data->list, &ipmmu_devices_backup);
> >>> +    spin_unlock(&ipmmu_devices_backup_lock);
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +#endif /* CONFIG_SYSTEM_SUSPEND */
> >>> +
> >>>    static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
> >>>    {
> >>>        uint64_t ttbr;
> >>> @@ -559,6 +798,9 @@ static int ipmmu_domain_init_context(struct 
> >>> ipmmu_vmsa_domain *domain)
> >>>            return ret;
> >>>
> >>>        domain->context_id = ret;
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +    domain->mmu->root->reg_backup[ret] = &root_pgtable[ret];
> >>> +#endif
> >>>
> >>>        /*
> >>>         * TTBR0
> >>> @@ -615,6 +857,9 @@ static void ipmmu_domain_destroy_context(struct 
> >>> ipmmu_vmsa_domain *domain)
> >>>        ipmmu_ctx_write_root(domain, IMCTR, IMCTR_FLUSH);
> >>>        ipmmu_tlb_sync(domain);
> >>>
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +    domain->mmu->root->reg_backup[domain->context_id] = NULL;
> >>> +#endif
> >>>        ipmmu_domain_free_context(domain->mmu->root, domain->context_id);
> >>>    }
> >>>
> >>> @@ -1427,6 +1672,14 @@ static int ipmmu_add_device(u8 devfn, struct 
> >>> device *dev)
> >>>        }
> >>>    #endif
> >>>
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +    if ( ipmmu_alloc_ctx_suspend(dev) )
> >>> +    {
> >>> +        dev_err(dev, "Failed to allocate context for suspend\n");
> >>> +        return -ENOMEM;
> >>> +    }
> >>> +#endif
> >>
> >> ... The initial version was based on the driver code without PCI
> >> support, but it is now present. There is PCI-specific code above in this
> >> function (not visible in the context) that performs some initialization,
> >> allocation and device assignment. What I mean is that in case of the
> >> suspend context allocation error here, we will need to undo these
> >> actions (i.e. deassign device). I would move this context allocation
> >> (whose probability to fail is much lower than what is done for PCI dev)
> >> above the PCI-specific stuff, and perform the context freeing on the
> >> error path.
> >
> > Maybe it would be better just to add some checks to the suspend handler.
> > We could skip suspend in case the context is not available, and avoid
> > deallocating previously allocated stuff. This is similar to what is
> > done for GICs.
> >
> > What do you think? Or do you prefer to destroy everything related to the
> > IOMMU here on error?
>
> I would prefer if we fail early here in ipmmu_add_device (and rollback
> changes) rather than continue and fail later, other people might think
> differently. I think, if we cannot simply allocate a memory for the
> sctructures that situation is bad.

Got it, I’ll fix this in the next version of the patch series.
Thank you for pointing that out.

>
>
>
> >
> >>
> >>> +
> >>>        dev_info(dev, "Added master device (IPMMU %s micro-TLBs %u)\n",
> >>>                 dev_name(fwspec->iommu_dev), fwspec->num_ids);
> >>>
> >>> @@ -1492,6 +1745,10 @@ static const struct iommu_ops ipmmu_iommu_ops =
> >>>        .unmap_page      = arm_iommu_unmap_page,
> >>>        .dt_xlate        = ipmmu_dt_xlate,
> >>>        .add_device      = ipmmu_add_device,
> >>> +#ifdef CONFIG_SYSTEM_SUSPEND
> >>> +    .suspend         = ipmmu_suspend,
> >>> +    .resume          = ipmmu_resume,
> >>> +#endif
> >>>    };
> >>>
> >>>    static __init int ipmmu_init(struct dt_device_node *node, const void 
> >>> *data)
> >
> > Best regards,
> > Mykola
> >
>

Best regards,
Mykola

Reply via email to