Hi Oleksandr,

On Wed, Sep 3, 2025 at 1:01 PM Oleksandr Tyshchenko
<oleksandr_tyshche...@epam.com> wrote:
>
>
>
> On 02.09.25 01:10, Mykola Kvach wrote:
>
> Hello Mykola
>
>
> > From: Oleksandr Tyshchenko <oleksandr_tyshche...@epam.com>
> >
> > Store and restore active context and micro-TLB registers.
> >
> > Tested on R-Car H3 Starter Kit.
> >
> > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshche...@epam.com>
> > Signed-off-by: Mykola Kvach <mykola_kv...@epam.com>
> > ---
> > Changes in V6:
> > - refactor code related to hw_register struct, from now it's called
> >    ipmmu_reg_ctx
>
> The updated version looks good, thanks. However, I have one
> concern/request ...
>
> > ---
> >   xen/drivers/passthrough/arm/ipmmu-vmsa.c | 257 +++++++++++++++++++++++
> >   1 file changed, 257 insertions(+)
> >
> > diff --git a/xen/drivers/passthrough/arm/ipmmu-vmsa.c 
> > b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
> > index ea9fa9ddf3..0973559861 100644
> > --- a/xen/drivers/passthrough/arm/ipmmu-vmsa.c
> > +++ b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
> > @@ -71,6 +71,8 @@
> >   })
> >   #endif
> >
> > +#define dev_dbg(dev, fmt, ...)    \
> > +    dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
> >   #define dev_info(dev, fmt, ...)    \
> >       dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
> >   #define dev_warn(dev, fmt, ...)    \
> > @@ -130,6 +132,24 @@ struct ipmmu_features {
> >       unsigned int imuctr_ttsel_mask;
> >   };
> >
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +
> > +struct ipmmu_reg_ctx {
> > +    unsigned int imttlbr0;
> > +    unsigned int imttubr0;
> > +    unsigned int imttbcr;
> > +    unsigned int imctr;
> > +};
> > +
> > +struct ipmmu_vmsa_backup {
> > +    struct device *dev;
> > +    unsigned int *utlbs_val;
> > +    unsigned int *asids_val;
> > +    struct list_head list;
> > +};
> > +
> > +#endif
> > +
> >   /* Root/Cache IPMMU device's information */
> >   struct ipmmu_vmsa_device {
> >       struct device *dev;
> > @@ -142,6 +162,9 @@ struct ipmmu_vmsa_device {
> >       struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
> >       unsigned int utlb_refcount[IPMMU_UTLB_MAX];
> >       const struct ipmmu_features *features;
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +    struct ipmmu_reg_ctx *reg_backup[IPMMU_CTX_MAX];
> > +#endif
> >   };
> >
> >   /*
> > @@ -547,6 +570,222 @@ static void ipmmu_domain_free_context(struct 
> > ipmmu_vmsa_device *mmu,
> >       spin_unlock_irqrestore(&mmu->lock, flags);
> >   }
> >
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +
> > +static DEFINE_SPINLOCK(ipmmu_devices_backup_lock);
> > +static LIST_HEAD(ipmmu_devices_backup);
> > +
> > +static struct ipmmu_reg_ctx root_pgtable[IPMMU_CTX_MAX];
> > +
> > +static uint32_t ipmmu_imuasid_read(struct ipmmu_vmsa_device *mmu,
> > +                                   unsigned int utlb)
> > +{
> > +    return ipmmu_read(mmu, ipmmu_utlb_reg(mmu, IMUASID(utlb)));
> > +}
> > +
> > +static void ipmmu_utlbs_backup(struct ipmmu_vmsa_device *mmu)
> > +{
> > +    struct ipmmu_vmsa_backup *backup_data;
> > +
> > +    dev_dbg(mmu->dev, "Handle micro-TLBs backup\n");
> > +
> > +    spin_lock(&ipmmu_devices_backup_lock);
> > +
> > +    list_for_each_entry( backup_data, &ipmmu_devices_backup, list )
> > +    {
> > +        struct iommu_fwspec *fwspec = 
> > dev_iommu_fwspec_get(backup_data->dev);
> > +        unsigned int i;
> > +
> > +        if ( to_ipmmu(backup_data->dev) != mmu )
> > +            continue;
> > +
> > +        for ( i = 0; i < fwspec->num_ids; i++ )
> > +        {
> > +            unsigned int utlb = fwspec->ids[i];
> > +
> > +            backup_data->asids_val[i] = ipmmu_imuasid_read(mmu, utlb);
> > +            backup_data->utlbs_val[i] = ipmmu_imuctr_read(mmu, utlb);
> > +        }
> > +    }
> > +
> > +    spin_unlock(&ipmmu_devices_backup_lock);
> > +}
> > +
> > +static void ipmmu_utlbs_restore(struct ipmmu_vmsa_device *mmu)
> > +{
> > +    struct ipmmu_vmsa_backup *backup_data;
> > +
> > +    dev_dbg(mmu->dev, "Handle micro-TLBs restore\n");
> > +
> > +    spin_lock(&ipmmu_devices_backup_lock);
> > +
> > +    list_for_each_entry( backup_data, &ipmmu_devices_backup, list )
> > +    {
> > +        struct iommu_fwspec *fwspec = 
> > dev_iommu_fwspec_get(backup_data->dev);
> > +        unsigned int i;
> > +
> > +        if ( to_ipmmu(backup_data->dev) != mmu )
> > +            continue;
> > +
> > +        for ( i = 0; i < fwspec->num_ids; i++ )
> > +        {
> > +            unsigned int utlb = fwspec->ids[i];
> > +
> > +            ipmmu_imuasid_write(mmu, utlb, backup_data->asids_val[i]);
> > +            ipmmu_imuctr_write(mmu, utlb, backup_data->utlbs_val[i]);
> > +        }
> > +    }
> > +
> > +    spin_unlock(&ipmmu_devices_backup_lock);
> > +}
> > +
> > +static void ipmmu_domain_backup_context(struct ipmmu_vmsa_domain *domain)
> > +{
> > +    struct ipmmu_vmsa_device *mmu = domain->mmu->root;
> > +    struct ipmmu_reg_ctx *regs = mmu->reg_backup[domain->context_id];
> > +
> > +    dev_dbg(mmu->dev, "Handle domain context %u backup\n", 
> > domain->context_id);
> > +
> > +    regs->imttlbr0 = ipmmu_ctx_read_root(domain, IMTTLBR0);
> > +    regs->imttubr0 = ipmmu_ctx_read_root(domain, IMTTUBR0);
> > +    regs->imttbcr  = ipmmu_ctx_read_root(domain, IMTTBCR);
> > +    regs->imctr    = ipmmu_ctx_read_root(domain, IMCTR);
> > +}
> > +
> > +static void ipmmu_domain_restore_context(struct ipmmu_vmsa_domain *domain)
> > +{
> > +    struct ipmmu_vmsa_device *mmu = domain->mmu->root;
> > +    struct ipmmu_reg_ctx *regs  = mmu->reg_backup[domain->context_id];
> > +
> > +    dev_dbg(mmu->dev, "Handle domain context %u restore\n", 
> > domain->context_id);
> > +
> > +    ipmmu_ctx_write_root(domain, IMTTLBR0, regs->imttlbr0);
> > +    ipmmu_ctx_write_root(domain, IMTTUBR0, regs->imttubr0);
> > +    ipmmu_ctx_write_root(domain, IMTTBCR,  regs->imttbcr);
> > +    ipmmu_ctx_write_all(domain,  IMCTR,    regs->imctr | IMCTR_FLUSH);
> > +}
> > +
> > +/*
> > + * Xen: Unlike Linux implementation, Xen uses a single driver instance
> > + * for handling all IPMMUs. There is no framework for ipmmu_suspend/resume
> > + * callbacks to be invoked for each IPMMU device. So, we need to iterate
> > + * through all registered IPMMUs performing required actions.
> > + *
> > + * Also take care of restoring special settings, such as translation
> > + * table format, etc.
> > + */
> > +static int __must_check ipmmu_suspend(void)
> > +{
> > +    struct ipmmu_vmsa_device *mmu;
> > +
> > +    if ( !iommu_enabled )
> > +        return 0;
> > +
> > +    printk(XENLOG_DEBUG "ipmmu: Suspending ...\n");
> > +
> > +    spin_lock(&ipmmu_devices_lock);
> > +
> > +    list_for_each_entry( mmu, &ipmmu_devices, list )
> > +    {
> > +        if ( ipmmu_is_root(mmu) )
> > +        {
> > +            unsigned int i;
> > +
> > +            for ( i = 0; i < mmu->num_ctx; i++ )
> > +            {
> > +                if ( !mmu->domains[i] )
> > +                    continue;
> > +                ipmmu_domain_backup_context(mmu->domains[i]);
> > +            }
> > +        }
> > +        else
> > +            ipmmu_utlbs_backup(mmu);
> > +    }
> > +
> > +    spin_unlock(&ipmmu_devices_lock);
> > +
> > +    return 0;
> > +}
> > +
> > +static void ipmmu_resume(void)
> > +{
> > +    struct ipmmu_vmsa_device *mmu;
> > +
> > +    if ( !iommu_enabled )
> > +        return;
> > +
> > +    printk(XENLOG_DEBUG "ipmmu: Resuming ...\n");
> > +
> > +    spin_lock(&ipmmu_devices_lock);
> > +
> > +    list_for_each_entry( mmu, &ipmmu_devices, list )
> > +    {
> > +        uint32_t reg;
> > +
> > +        /* Do not use security group function */
> > +        reg = IMSCTLR + mmu->features->control_offset_base;
> > +        ipmmu_write(mmu, reg, ipmmu_read(mmu, reg) & ~IMSCTLR_USE_SECGRP);
> > +
> > +        if ( ipmmu_is_root(mmu) )
> > +        {
> > +            unsigned int i;
> > +
> > +            /* Use stage 2 translation table format */
> > +            reg = IMSAUXCTLR + mmu->features->control_offset_base;
> > +            ipmmu_write(mmu, reg, ipmmu_read(mmu, reg) | IMSAUXCTLR_S2PTE);
> > +
> > +            for ( i = 0; i < mmu->num_ctx; i++ )
> > +            {
> > +                if ( !mmu->domains[i] )
> > +                    continue;
> > +                ipmmu_domain_restore_context(mmu->domains[i]);
> > +            }
> > +        }
> > +        else
> > +            ipmmu_utlbs_restore(mmu);
> > +    }
> > +
> > +    spin_unlock(&ipmmu_devices_lock);
> > +}
> > +
> > +static int ipmmu_alloc_ctx_suspend(struct device *dev)
> > +{
> > +    struct ipmmu_vmsa_backup *backup_data;
> > +    unsigned int *utlbs_val, *asids_val;
> > +    struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> > +
> > +    utlbs_val = xzalloc_array(unsigned int, fwspec->num_ids);
> > +    if ( !utlbs_val )
> > +        return -ENOMEM;
> > +
> > +    asids_val = xzalloc_array(unsigned int, fwspec->num_ids);
> > +    if ( !asids_val )
> > +    {
> > +        xfree(utlbs_val);
> > +        return -ENOMEM;
> > +    }
> > +
> > +    backup_data = xzalloc(struct ipmmu_vmsa_backup);
> > +    if ( !backup_data )
> > +    {
> > +        xfree(utlbs_val);
> > +        xfree(asids_val);
> > +        return -ENOMEM;
> > +    }
> > +
> > +    backup_data->dev = dev;
> > +    backup_data->utlbs_val = utlbs_val;
> > +    backup_data->asids_val = asids_val;
> > +
> > +    spin_lock(&ipmmu_devices_backup_lock);
> > +    list_add(&backup_data->list, &ipmmu_devices_backup);
> > +    spin_unlock(&ipmmu_devices_backup_lock);
> > +
> > +    return 0;
> > +}
> > +
> > +#endif /* CONFIG_SYSTEM_SUSPEND */
> > +
> >   static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
> >   {
> >       uint64_t ttbr;
> > @@ -559,6 +798,9 @@ static int ipmmu_domain_init_context(struct 
> > ipmmu_vmsa_domain *domain)
> >           return ret;
> >
> >       domain->context_id = ret;
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +    domain->mmu->root->reg_backup[ret] = &root_pgtable[ret];
> > +#endif
> >
> >       /*
> >        * TTBR0
> > @@ -615,6 +857,9 @@ static void ipmmu_domain_destroy_context(struct 
> > ipmmu_vmsa_domain *domain)
> >       ipmmu_ctx_write_root(domain, IMCTR, IMCTR_FLUSH);
> >       ipmmu_tlb_sync(domain);
> >
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +    domain->mmu->root->reg_backup[domain->context_id] = NULL;
> > +#endif
> >       ipmmu_domain_free_context(domain->mmu->root, domain->context_id);
> >   }
> >
> > @@ -1427,6 +1672,14 @@ static int ipmmu_add_device(u8 devfn, struct device 
> > *dev)
> >       }
> >   #endif
> >
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +    if ( ipmmu_alloc_ctx_suspend(dev) )
> > +    {
> > +        dev_err(dev, "Failed to allocate context for suspend\n");
> > +        return -ENOMEM;
> > +    }
> > +#endif
>
> ... The initial version was based on the driver code without PCI
> support, but it is now present. There is PCI-specific code above in this
> function (not visible in the context) that performs some initialization,
> allocation and device assignment. What I mean is that in case of the
> suspend context allocation error here, we will need to undo these
> actions (i.e. deassign device). I would move this context allocation
> (whose probability to fail is much lower than what is done for PCI dev)
> above the PCI-specific stuff, and perform the context freeing on the
> error path.

Maybe it would be better just to add some checks to the suspend handler.
We could skip suspend in case the context is not available, and avoid
deallocating previously allocated stuff. This is similar to what is
done for GICs.

What do you think? Or do you prefer to destroy everything related to the
IOMMU here on error?

>
> > +
> >       dev_info(dev, "Added master device (IPMMU %s micro-TLBs %u)\n",
> >                dev_name(fwspec->iommu_dev), fwspec->num_ids);
> >
> > @@ -1492,6 +1745,10 @@ static const struct iommu_ops ipmmu_iommu_ops =
> >       .unmap_page      = arm_iommu_unmap_page,
> >       .dt_xlate        = ipmmu_dt_xlate,
> >       .add_device      = ipmmu_add_device,
> > +#ifdef CONFIG_SYSTEM_SUSPEND
> > +    .suspend         = ipmmu_suspend,
> > +    .resume          = ipmmu_resume,
> > +#endif
> >   };
> >
> >   static __init int ipmmu_init(struct dt_device_node *node, const void 
> > *data)

Best regards,
Mykola

Reply via email to